<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [None]:
!pip install -U kaleido

In [None]:
import matplotlib.pyplot as plt
import random

from scipy.stats import gamma,uniform,randint,norm,triang
import pandas as pd
import numpy as np
import math

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio

In [None]:
#################
# Bar chart
#################

# Data from the provided bar chart image
countries = [
    "Australia", "China", "Hong\nKong", "India", "Indonesia", "Japan",
    "Malaysia", "New\nZealand", "Singapore", "South\nKorea", "Sri Lanka",
    "Taiwan", "Thailand", "Vietnam"
]

random.seed(0)  # Seed for reproducibility
speeds = [random.uniform(6, 20) for p in range(0, 14)]
print(speeds)
print(np.mean(speeds))
print(np.max(speeds))

font = {'fontname':'Arial'}

# Creating the bar chart
plt.figure(figsize=(10, 5))
plt.bar(countries, speeds, color="#5f89c2")

# Adding the title and labels
plt.title('Average Internet Speeds in Asia', **font, fontsize=25, fontweight='bold', loc='left', pad=27, x=-0.055)
plt.xlabel('Country', **font, fontsize=12, fontweight='bold', horizontalalignment='right', labelpad=-20,x=1.05)
plt.ylabel('Speed (Mbps)', **font, fontsize=12, fontweight='bold', rotation=0, verticalalignment='top', labelpad=-40, y=1.06)

plt.ylim([0,23])
plt.xlim(-0.5, 14)

# Rotating the x-axis labels for better readability
# plt.xticks(rotation=45, ha="right")


# Display the chart
plt.tight_layout() # This will adjust spacing to accommodate the rotated x-axis labels
plt.grid(axis='y', color='gray', linestyle='-', linewidth=0.5)
plt.savefig("./bar chart.svg", bbox_inches='tight')
plt.show()


In [None]:
#################
# Line chart
#################

# Assuming the values from the provided line chart image for each month
months = [
    "January", "February", "March", "April", "May", "June",
    "July", "August", "September", "October", "November", "December"
]
# oil_prices = [random.uniform(35, 61) for p in range(0, 12)]
# print(oil_prices)

oil_prices = [56.27, 45.61, 47.49, 43.77, 39.26, 37.36, 40.93, 54.28, 58.40, 59.88, 60.13, 63.73]


font = {'fontname':'Arial'}

# Create a numpy array for months for plotting
x = np.arange(len(months))

# Creating the line chart
plt.figure(figsize=(10, 5))
plt.plot(x, oil_prices, marker='o', color="#5f89c2")

# Adding the title and labels
plt.title('Monthly Oil Price History in 2015', **font, fontsize=25, fontweight='bold', loc='left', pad=27, x=-0.055)
plt.xlabel('Month', **font, fontsize=12, fontweight='bold', horizontalalignment='right', labelpad=-20,x=1.07)
plt.ylabel('Oil Price ($ per barrel)', **font, fontsize=12, fontweight='bold', rotation=0, verticalalignment='top', labelpad=-40, y=1.06)

# Replacing the x-axis labels with the month names
plt.xticks(x, months)
plt.ylim([35,65])
plt.xlim(0, 11)

# Display the chart
plt.grid(color='gray', linestyle='-', linewidth=0.5)
plt.tight_layout()
plt.savefig("./line chart.svg", bbox_inches='tight')
plt.show()


In [None]:
#################
# 100% Stacked Bar chart
#################

random.seed(3)

# Data from the provided 100% stacked bar chart image
categories = ['High School Graduate or Less', 'Some College Degree', 'College Graduate', 'Postgraduate Study']
democrats = [random.randint(1, 60) for p in range(0, 4)] # Assuming the values based on the image
republicans = [random.randint(1, 60) for p in range(0, 4)]  # Assuming the values based on the image
others = [random.randint(1, 10) for p in range(0, 4)]  # Assuming 'Other' category fills the rest to make 100%

print(democrats, republicans, others)

font = {'fontname':'Arial'}

# Normalize data to 100% for each category
totals = [d + r + o for d, r, o in zip(democrats, republicans, others)]
democrats_percent = [d / t * 100 for d, t in zip(democrats, totals)]
republicans_percent = [r / t * 100 for r, t in zip(republicans, totals)]
others_percent = [o / t * 100 for o, t in zip(others, totals)]

# Create a numpy array for the cumulative sum of democrats and republicans
cumulative_republicans_democrats = np.add(others_percent, democrats_percent)

fig, ax = plt.subplots(figsize=(10, 5))

# Plotting each category
ax.bar(categories, democrats_percent, label='Democrats', color='#5f89c2')
ax.bar(categories, others_percent, bottom=democrats_percent, label='Other', color='#949494')
ax.bar(categories, republicans_percent, bottom=cumulative_republicans_democrats,label='Republicans', color='#c23c3f')

# Adding the title and labels
ax.set_xlabel('Education', **font, fontsize=12, fontweight='bold', horizontalalignment='right', labelpad=-20,x=1.11)
ax.set_ylabel('Approval Rating', **font, fontsize=12, fontweight='bold', rotation=0, verticalalignment='top', labelpad=-40, y=1.06)
ax.set_title('Election Exit Poll of California State by Education', **font, fontsize=25, fontweight='bold', loc='left', pad=27, x=-0.055)

# Adding the legend
ax.legend(loc="upper right", bbox_to_anchor=(1.2, 1), handlelength=0.8)
plt.ylim(0,100)
plt.xlim(-0.6, 3.6)

# Display the chart
plt.grid(axis='y', color='gray', linestyle='-', linewidth=0.5)
plt.savefig("./100 stacked bar chart.png", bbox_inches='tight')
plt.show()


In [None]:
#################
# Treemap
#################


labels = ['Google', 'Bing', 'Yahoo!', 'Facebook', 'Amazon', 'eBay',
          'Twitter', 'LinkedIn', 'Classmates', 'AOL', 'Ask', 'Wal-Mart',
          'Craigslist', 'Target', 'Best Buy', 'Sears', 'Apple', 'Samsung', 'HP',
          'Dell', 'Bank of America', 'Experian', 'Chase', 'Citibank', 'PayPal',
          'NY Times', 'CNN', 'ESPN', 'NBC\nUniversal', 'BBC', 'Fox Media', 'CBS', 'Fox News']
categories = ['Search', 'Search', 'Search', 'Social Media', 'Retail', 'Retail',
           'Social Media', 'Social Media', 'Social Media', 'Search', 'Search', 'Retail',
           'Retail', 'Retail', 'Retail', 'Retail', 'Computer', 'Computer', 'Computer',
           'Computer', 'Financial', 'Financial', 'Financial', 'Financial', 'Financial',
           'News', 'News', 'News', 'News', 'News', 'News', 'News', 'News']
# sizes = [20, 15, 15, 10, 8, 8, 6, 6, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1]
sizes = [8, 13, 3, 9, 7, 13, 
         9, 11, 2, 7, 3, 14, 
         2, 10, 14, 5, 3, 8, 12, 
         4, 20, 3, 4, 17, 7, 
         7, 11, 3, 6, 5, 12, 3, 13]
# sizes = [random.randint(1, 20) for p in rsange(0, len(labels))]
# print(sizes)

df = pd.DataFrame(dict(labels = labels, categories = categories, sizes = sizes))
df["websites"] = "websites"

fig = px.treemap(df, path=['websites', 'categories', 'labels'], 
                 values='sizes', template = 'seaborn')
fig.update_traces(root_color="lightgrey")
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.show()
#fig_widget = go.FigureWidget(fig)
#fig_widget

In [None]:
#################
# area chart
#################

from matplotlib import dates as mdates
import datetime

random.seed(2)

# Assuming the values from the provided area chart image for each month
months = pd.date_range('2013-01', '2014-12', freq='MS')
# coffee_prices = [random.uniform(4.4, 6.2) for i in range(0, 24)]

coffee_prices = list(reversed([6.120861689400649, 5.903897980633009, 5.4060894767068294, 
                 6.1089118515678384, 6.170675213309537, 5.605514722592398, 5.652769591286059, 
                 5.482833334968791, 5.724745980323342, 5.546456236604595, 
                 5.4906994982212325, 5.392243120553509, 5.246167230801606, 4.98508916645865, 
                 5.175205352524284, 5.108357276369668, 5.301421746227439,  
                 5.379518685372778, 5.2007375397065365, 4.901792461908256,
                 4.764663792907144, 4.6494007427634745, 5.136808951775162, 4.97323723013662]))
print(coffee_prices)
font = {'fontname':'Arial'}

# Creating the area chart
fig, ax = plt.subplots(1, 1, figsize=(10, 5))

ax.fill_between(months, coffee_prices, color="#5f89c2", alpha=0.4)

# Minor ticks every month.
fmt_month = mdates.MonthLocator()
# Minor ticks every year.
fmt_year = mdates.YearLocator()

ax.xaxis.set_minor_locator(fmt_month)
# '%b' to get the names of the month
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%b'))
ax.xaxis.set_major_locator(fmt_year)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))

# fontsize for month labels
ax.tick_params(labelsize=10, which='both')
# create a second x-axis beneath the first x-axis to show the year in YYYY format
sec_xaxis = ax.secondary_xaxis(-0.1)
sec_xaxis.xaxis.set_major_locator(fmt_year)
sec_xaxis.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))

# Hide the second x-axis spines and ticks
sec_xaxis.spines['bottom'].set_visible(False)
sec_xaxis.tick_params(length=0, labelsize=10)

plt.plot(months, coffee_prices, color="#5f89c2", alpha=0.6)

# Adding the title and labels
plt.title('Average Coffee Bean Price from 2013 to 2014', **font, fontsize=25, fontweight='bold', loc='left', pad=27, x=-0.055)
plt.xlabel('Month', **font, fontsize=12, fontweight='bold', horizontalalignment='right', labelpad=-20,x=1.08)
plt.ylabel('Price ($ per lb)', **font, fontsize=12, fontweight='bold', rotation=0, verticalalignment='top', labelpad=-40, y=1.06)

plt.ylim(4.4, 6.2)
plt.xlim([datetime.date(2013, 1, 1), datetime.date(2014, 12, 1)])

plt.grid(color='gray', linestyle='-', linewidth=0.5)

# Improve the x-axis with month names
# plt.xticks(np.arange(min(months), max(months), 2), [x.strftime('%b %Y') for x in months][::2])
plt.tight_layout()
# Display the chart
plt.show()

In [None]:
#################
# stacked bar chart
#################

font = {'fontname':'Arial'}
random.seed(3)
# Assuming the values from the provided stacked bar chart image for each category
cities = ['New York\nCity', 'Las Vegas', 'Honolulu', 'Atlanta', 'Boston', 'Washington\nD.C.', 'Chicago', 'Orlando', 'San\nFrancisco', 'Seattle']
sandwich = [random.uniform(3, 20) for p in range(0, 10)]  # Example values
water = [random.uniform(3, 20) for p in range(0, 10)]  # Example values
peanuts = [random.uniform(3, 20) for p in range(0, 10)]  # Example values
soda = [random.uniform(3, 20) for p in range(0, 10)]  # Example values
vodka = [random.uniform(3, 20) for p in range(0, 10)]  # Example values

print(sandwich)
print(water)
print(peanuts)
print(soda)
print(vodka)

# Calculate the bottom parameter for each stack
water_bottom = np.array(sandwich)
peanuts_bottom = water_bottom + np.array(water)
soda_bottom = peanuts_bottom + np.array(peanuts)
vodka_bottom = soda_bottom + np.array(soda)

# Plot
fig, ax = plt.subplots(figsize=(10, 5))
ax.bar(cities, vodka, bottom=vodka_bottom, label='Vodka', color='#e67a24')
ax.bar(cities, soda, bottom=soda_bottom, label='Soda', color='#89569b')
ax.bar(cities, peanuts, bottom=peanuts_bottom, label='Peanuts', color='#e21d3c')
ax.bar(cities, water, bottom=water_bottom, label='Water', color='#5fb147')
ax.bar(cities, sandwich, label='Sandwich', color='#4b90c6')

# Labeling
ax.set_ylabel('Cost ($)', **font, fontsize=12, fontweight='bold', rotation=0, verticalalignment='top', labelpad=-40, y=1.06)
ax.set_xlabel('City', **font, fontsize=12, fontweight='bold', horizontalalignment='right', labelpad=-20,x=1.05)
ax.set_title('Hotel Costs of Room Service', **font, fontsize=25, fontweight='bold', loc='left', pad=27, x=-0.055)
ax.set_xticks(range(len(cities)))
ax.set_xticklabels(cities)
ax.set_ylim(0, 80)
ax.set_xlim(-0.6, 9.6)
ax.legend(loc="upper right", bbox_to_anchor=(1.15, 1), handlelength=0.8)

plt.grid(axis='y', color='gray', linestyle='-', linewidth=0.5)
# Show the plot
plt.savefig("./stacked bar chart.png", bbox_inches='tight')
plt.show()

In [None]:
#################
# Pie chart
#################

font = {'fontname':'Arial'}

random.seed(0)

labels = 'Apple', 'Samsung', 'Huawei', 'Lenovo', 'Xiaomi', 'Others'
sizes = [random.randint(10, 40) for i in range(0, 6)]  # Replace these values with your actual percentages
print(sizes)

colors = ['#d32938','#518bc9','#62af46','#89569b','#e67a24','#afafb0']  # Colors for each section

# Plot
plt.pie(sizes, labels=labels, colors=colors, startangle=140)

plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title('Global Smartphone Market Share (%)', **font, fontsize=25, fontweight='bold', loc='left', pad=27, x=-0.055)
plt.savefig("./pie chart.png", bbox_inches='tight')
plt.show()

In [None]:
#################
# Stacked Area chart
#################

font = {'fontname':'Arial'}

random.seed(9)

# Years covered in the chart
years = np.arange(2009, 2015)

# Assuming some example data for the number of girls named Amelia, Isla, and Olivia
# These numbers are for illustrative purposes and may not reflect actual data
# amelia = [random.randint(7000, 16000) for p in range(0, 6)]
# isla = [random.randint(3000, 8500) for p in range(0, 6)]
# olivia = [random.randint(1000, 4500) for p in range(0, 6)]
# print(amelia, isla, olivia)

amelia = [15585, 13116, 11376, 9269, 10049, 7105]
isla = [5871, 7118, 6798, 4953, 5062, 6536]
olivia = [3670, 3525, 3867, 1167, 3982, 2552]

# Plotting the stacked area chart
plt.figure(figsize=(10, 5))
plt.stackplot(years, amelia, isla, olivia, labels=['Amelia', 'Isla', 'Olivia'], colors=['#7ba7d3', '#9fcbe7', '#d5e6ef'])

# Adding titles and labels
plt.title('Popular Girls\' Names in the UK', **font, fontsize=25, fontweight='bold', loc='left', pad=27, x=-0.055)
plt.ylabel('Number of Girls', **font, fontsize=12, fontweight='bold', rotation=0, verticalalignment='top', labelpad=-40, y=1.06)
plt.xlabel('Year', **font, fontsize=12, fontweight='bold', horizontalalignment='right', labelpad=-20,x=1.1)

plt.ylim(0, 30000)
plt.xlim(2009, 2014)

# Adding legend
plt.legend(loc='upper right', bbox_to_anchor=(1.15, 1), handlelength=0.8)

plt.grid(color='gray', linestyle='-', linewidth=0.5)
# Showing the plot
plt.savefig("./stacked area chart.png", bbox_inches='tight')
plt.show()

In [None]:
#################
# Scatter Plot
#################

font = {'fontname':'Arial'}

# Generate random data for the plot
np.random.seed(0)  # Seed for reproducibility
heights = np.random.normal(175, 10, 85)  # Simulated heights
weights = np.random.normal(75, 15, 85)  # Simulated weights

# print(heights, weights)

# heights = [192.64052346 179.00157208 184.78737984 197.40893199 193.6755799
#  165.2272212  184.50088418 173.48642792 173.96781148 179.10598502
#  176.44043571 189.54273507 182.61037725 176.21675016 179.43863233
#  178.33674327 189.94079073 172.94841736 178.13067702 166.45904261
#  149.47010184 181.53618595 183.64436199 167.5783498  197.69754624
#  160.45634325 175.45758517 173.1281615  190.32779214 189.6935877
#  176.54947426 178.7816252  166.12214252 155.19203532 171.52087851
#  176.56348969 187.30290681 187.02379849 171.12673183 171.97697249
#  164.51447035 160.79982063 157.93729809 194.50775395 169.90347818
#  170.61925698 162.4720464  182.77490356 158.86102152 172.8725972
#  166.04533439 178.86902498 169.89194862 163.19367816 174.71817772
#  179.28331871 175.66517222 178.02471898 168.65677906 171.37258834
#  168.27539552 171.40446838 166.86853718 157.73717398 176.77426142
#  170.98219064 158.69801653 179.62782256 165.92701636 175.51945396
#  182.29090562 176.28982911 186.39400685 162.6517418  179.02341641
#  168.15189909 166.29202851 169.21150335 171.88447468 175.56165342
#  163.34850159 184.00826487 179.6566244  159.63756314 189.88252194]

# weights = [103.43833764  92.68169357  72.30112746  58.93871068  90.8167759
#   68.9523458   93.33667606  78.12412467  89.64958555  80.34549596
#   85.59859752  75.15750031 101.78805741  76.90368139  81.02984045
#  103.24726046  54.78361408  55.94272502  89.54095062  57.40314892
#  104.15431778  68.79571529  63.78817783 103.8441304   97.20772187
#  103.01338441  98.59066987  62.08161472 103.6509743   70.97994944
#   87.03684594  89.20877952  72.6748486   84.21119056  88.83310007
#   80.64638297  58.50898814  79.47357261  94.89578845  64.5814821
#   72.7554819   68.47269672 102.73895593  85.08442136  81.11192754
#   63.45125888  83.08873787  64.88501009  75.47745837  65.46230882
#   85.14649942  83.64886225  71.87551867  80.94010069  58.60407737
#   52.63113611  81.59087552  77.50010243  84.52547155 110.74717162
#   89.1671923   61.30766662  91.75524432  55.26138884  68.07623093
#   73.97637592 100.70014082  63.82867767  62.60342192  73.52321213
#   65.0478257   91.89953883  58.80102737  57.78797021  68.43269933
#   67.52951324 103.94298081  89.2413121   76.31326862  56.61846722
#   87.66544465  59.99676979  51.82843355  92.82044689  79.75413918]

# Create the scatter plot with specific style adjustments
plt.figure(figsize=(10, 5))
scatter = plt.scatter(heights, weights, color='#5f89c2', edgecolor='none', s=50)  # s is the size of the dot

# Set title properties and position with increased font size
plt.title('Height vs. Weight of 85 Males', **font, fontsize=25, fontweight='bold', loc='left', pad=27, x=-0.055)

# Set the x-label with a custom offset to the right
plt.xlabel('Height (cm)', fontsize=12, **font, fontweight='bold', horizontalalignment='right', labelpad=-20,x=1.155)

# Set the y-label with a custom offset to the top
plt.ylabel('Weight (kg)', fontsize=12, **font, fontweight='bold', rotation=0, verticalalignment='top', labelpad=-40, y=1.06, )

# Set limits for x and y axis
plt.xlim(160, 200)
plt.ylim(40, 140)

# Adjust the right margin to add white space
plt.subplots_adjust(right=0.83)
# Adjust the top margin of the plot to allow more space for the title
plt.subplots_adjust(top=0.82)  # Adjust this value as needed to increase the space

# Add a gray grid
plt.grid(color='gray', linestyle='-', linewidth=0.5)

# Show the plot with the adjustments
plt.savefig("./scatter plot.svg", bbox_inches='tight')
plt.show()

In [None]:
pio.templates.default = 'plotly_white'
randState = 47
defaultColor = '#5F89C2'


# Plots

## Histogram


In [None]:
a = 4.2
scaleVal = 0.5

minVal = 3
maxVal = 5
minCDF = norm.cdf(minVal,a,scale=scaleVal)#gamma.cdf(minVal,a,scale=scaleVal)
maxCDF = norm.cdf(maxVal,a,scale=scaleVal)

uniList = uniform.rvs(size=2000,
                      random_state=randState)
#numList = [gamma.ppf(x*(maxCDF-minCDF)+minCDF,a,scale=scaleVal) for x in uniList]
numList = [norm.ppf(x*(maxCDF-minCDF)+minCDF,a,scale=scaleVal) for x in uniList]
df =pd.DataFrame(numList,columns=['Rating'])
fig = px.histogram(df,
                   x='Rating',
                   color_discrete_sequence=[defaultColor])
fig.update_traces(marker_line_width=1,
                  marker_line_color='white',
                  xbins={'start':minVal,
                                'end':maxVal,
                                'size':0.2})
fig.update_layout(title={'text':'<b>Taxi Passenger Ratings</b>',
                         'font':{'family':'Arial',
                                 'color':'black',
                                 'size':30}},
                  yaxis={'showline':True,
                         'title':{'text':None},
                         'linewidth':2,
                         'linecolor':'black',
                         'range':[0,350],
                         'ticks':'outside'},
                  xaxis={'showline':True,
                         'title':{'text':None},
                         'linewidth':2,
                         'linecolor':'black',
                         'ticks':'outside'},
                  margin={'r':50,
                          'b':10})
fig.add_annotation(x=0,
                   y=1.05,
                   xref='paper',
                   yref='paper',
                   text='<b>Frequency</b>',
                   font={'family':'Arial',
                         'color':'black',
                         'size':12},
                   xanchor='center',
                   showarrow=False)
fig.add_annotation(x=1,
                   y=0,
                   xref='paper',
                   yref='paper',
                   text='<b>Rating</b>',
                   font={'family':'Arial',
                         'color':'black',
                         'size':12},
                   xanchor='left',
                   yanchor='middle',
                   showarrow=False)
fig.show()

In [None]:
print(fig)

In [None]:
fig.write_image('histogram.png',
                width=1233,
                height=600)

# Bubble Chart


In [None]:
# cityList = ['New York City',
#             'Shanghai',
#             'Beijing',
#             'London',
#             'Seoul',
#             'Paris',
#             'Moscow',
#             'Mexico City',
#             'Tokyo',
#             'Guangzhou',
#             'Delhi']

cityList = ['City A',
            'City B',
            'City C',
            'City D',
            'City E',
            'City F',
            'City G',
            'City H',
            'City I',
            'City J',
            'City K']

numStationsList = randint.rvs(100,450,size=len(cityList),random_state=randState)
sysNudgeList = [-50,-100,-200,150,-10,-300,350,100,10,-90,-20]
sysLenList = [5*x/3-75+nudge for x,nudge in zip(numStationsList,sysNudgeList)]
staNudgeList = [0,-10,0,40,0,-10,0,10,0,0,-20]
numStationsList = [x+nudge for x,nudge in zip(numStationsList,staNudgeList)]
#ridershipList = uniform.rvs(size=len(cityList),random_state=randState)*4
minSta = min(numStationsList)-10
maxSta = max(numStationsList)-1
ridershipList = [3.5*(math.log(x)-math.log(minSta))/(math.log(maxSta)-math.log(minSta)) for x in numStationsList]
print(ridershipList)
df = pd.DataFrame({'city':cityList,
                   'Number of Stations':numStationsList,
                   'Total System Length (km)':sysLenList,
                   'Ridership (bn per year)':ridershipList})
df.index = df['city']

legendDF = pd.DataFrame({'x':3*[1],
                         'y':[5,6.625,8.5],
                         'markerSizes':[1.5, 2.5, 3.5]})

trace = go.Scatter(x=numStationsList,
                   y=sysLenList,
                   marker={'color':defaultColor,
                           'size':ridershipList,
                           'sizemode':'area',
                           'sizeref':0.001},
                   mode='markers',
                   showlegend=False)
legendTrace = go.Scatter(x=list(legendDF['x']),
                         y=list(legendDF['y']),
                         mode='markers',
                         marker={'size':list(legendDF['markerSizes']),
                                 'sizemode': 'area',
                                 'color':'#FFFFFF',
                                 'line':{'width':2,
                                         'color':'black'},
                                 'sizeref':0.001},
                         showlegend=False,
                         xaxis='x2',
                         yaxis='y2')
fig = go.Figure(data=[trace,legendTrace])

xScale = 5
yScale = 10
for city in cityList:
    fig.add_annotation(x=df.at[city,'Number of Stations']+xScale*df.at[city,'Ridership (bn per year)'],
                       y=df.at[city,'Total System Length (km)']-yScale*df.at[city,'Ridership (bn per year)'],
                       text='<b>'+city+'</b>',
                       xanchor='left',
                       yanchor='top',
                       showarrow=False)
    
fig.update_layout(title={'text':'<b>Metro Systems of the World</b>',
                         'font':{'family':'Arial',
                                 'color':'black',
                                 'size':30}},
                  yaxis={'showline':True,
                         'title':{'text':None},
                         'linewidth':2,
                         'linecolor':'black',
                         'range':[150,600],
                         'ticks':'outside'},
                  xaxis={'domain':[0, 0.85],
                         'showline':True,
                         'title':{'text':None},
                         'linewidth':2,
                         'linecolor':'black',
                         'range':[100,450],
                         'ticks':'outside'},
                  xaxis2={'domain':[0.85, 1],
                          'showgrid':False,
                          'showticklabels':False},
                  yaxis2={'anchor':'x2',
                          'range':[0,10],
                          'showgrid':False,
                          'showticklabels':False},
                  margin={'r':0,#200,
                          'b':10,'t':50})
fig.add_annotation(x=0,
                   y=1.05,
                   xref='paper',
                   yref='paper',
                   text='<b>Total System Length (km)</b>',
                   font={'family':'Arial',
                         'color':'black',
                         'size':12},
                   xanchor='center',
                   showarrow=False)
fig.add_annotation(x=0.925,
                   y=1.05,
                   xref='paper',
                   yref='paper',
                   text='<b>Ridership<br>(bn per year)</b>',
                   font={'family':'Arial',
                         'color':'black',
                         'size':12},
                   xanchor='center',
                   showarrow=False)
fig.add_annotation(x=0.85,
                   y=0,
                   xref='paper',
                   yref='paper',
                   text='<b>Number of Stations</b>',
                   font={'family':'Arial',
                         'color':'black',
                         'size':12},
                   xanchor='left',
                   yanchor='middle',
                   showarrow=False)
for i in legendDF.index:
    fig.add_annotation(x=legendDF.at[i,'x']+1,
                       y=legendDF.at[i,'y'],
                       text='<b>'+str(legendDF.at[i,'markerSizes'])+'</b>',
                       xanchor='center',
                       yanchor='middle',
                       xref='x2',
                       yref='y2',
                       showarrow=False)
fig.show()

In [None]:
print(fig)

In [None]:
tempFig = px.scatter(df,
                     x='Number of Stations',
                     y='Ridership (bn per year)',
                     trendline='ols')
tempFig.show()

In [None]:
fig.write_image('bubblePlot.png',
                width=1233,
                height=600)

## Scatter Plot


In [None]:
maleHeightList = (uniform.rvs(size=85,random_state=randState)*(200-160))+160
normRandList = norm.rvs(0,10,size=85,random_state=randState)
maleWeightList = [4*x/3-160+rand for x,rand in zip(maleHeightList,normRandList)]

df = pd.DataFrame({'Height (cm)':maleHeightList,
                   'Weight (cm)':maleWeightList})
fig = px.scatter(df,
                 x='Height (cm)',
                 y='Weight (cm)',
                 color_discrete_sequence=[defaultColor])
fig.update_layout(title={'text':'<b>Height vs. Weight of 85 Males</b>',
                         'font':{'family':'Arial',
                                 'color':'black',
                                 'size':30}},
                  yaxis={'showline':True,
                         'title':{'text':None},
                         'linewidth':2,
                         'linecolor':'black',
                         'range':[40,130],
                         'ticks':'outside'},
                  xaxis={'showline':True,
                         'title':{'text':None},
                         'linewidth':2,
                         'linecolor':'black',
                         'range':[160,200],
                         'ticks':'outside'},
                  margin={'r':75,
                          'b':10})
fig.add_annotation(x=0,
                   y=1.05,
                   xref='paper',
                   yref='paper',
                   text='<b>Weight (cm)</b>',
                   font={'family':'Arial',
                         'color':'black',
                         'size':12},
                   xanchor='center',
                   showarrow=False)
fig.add_annotation(x=1,
                   y=0,
                   xref='paper',
                   yref='paper',
                   text='<b>Height (cm)</b>',
                   font={'family':'Arial',
                         'color':'black',
                         'size':12},
                   xanchor='left',
                   yanchor='middle',
                   showarrow=False)
fig.show()

In [None]:
print(fig)

In [None]:
fig.write_image('scatter.png',
                width=1233,
                height=600)

## Choropleth


In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')
df = df[['code','state']]
#df['unemploy'] = pd.Series(uniform.rvs(0,7.05,size=len(df),random_state=randState))

mu = 0
sigma = 3.1
minVal = 0
maxVal = 7#7.05
minCDF = norm.cdf(minVal,mu,scale=scaleVal)#gamma.cdf(minVal,a,scale=scaleVal)
maxCDF = norm.cdf(maxVal,mu,scale=scaleVal)

uniList = uniform.rvs(size=len(df),
                      random_state=randState)
numList = [norm.ppf(x*(maxCDF-minCDF)+minCDF,mu,scale=sigma) for x in uniList]

df['unemploy'] = pd.Series(numList)

blueList = px.colors.sequential.Blues
blueColors = [[0,blueList[0]],
              [0,blueList[1]],
              [1/7,blueList[1]],
              [1/7,blueList[2]],
              [2/7,blueList[2]],
              [2/7,blueList[3]],
              [3/7,blueList[3]],
              [3/7,blueList[4]],
              [4/7,blueList[4]],
              [4/7,blueList[5]],
              [5/7,blueList[5]],
              [5/7,blueList[6]],
              [6/7,blueList[6]],
              [6/7,blueList[7]],
              [1,blueList[7]],
              [1,blueList[8]]]

data = go.Choropleth(locations=df['code'],
                     z=df['unemploy'].astype(float),
                     locationmode='USA-states',
                     colorscale=blueColors,
                     colorbar={'orientation':'h',
                               'x':0,
                               'y':0,
                               'xanchor':'left',
                               'yanchor':'top',
                               'len':0.35,
                               'ticklabelposition':'outside right',
                               'ticktext':['{:.1f}%'.format(x*1.15) for x in range(7)],
                               'tickvals':[x+0.05 if x==0 else x for x in range(7)]})
fig = go.Figure(data=data)

boldCodeList = ['<b>'+x+'</b>' for x in df['code']]
fig.add_scattergeo(locations=df['code'],
                   locationmode='USA-states',
                   text=boldCodeList,
                   textfont={'family':'Arial',
                             'size':12},
                   mode='text')
fig.update_traces(marker_line_width=1,marker_line_color='white')
fig.update_layout(title={'text':'<b>Unemployment Rates for States in 2015</b>',
                         'font':{'family':'Arial',
                                 'color':'black',
                                 'size':30}},
                  geo_scope='usa')
fig.show()

In [None]:
print(fig)

In [None]:
fig.write_image('choropleth.png',
                width=1233,
                height=600)