# Class #10 Notes

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

#### Worlwide Chloropleth Map

In [2]:
df = pd.read_csv("internet_usage.csv")
df.shape

(6056, 4)

In [3]:
df.head()

Unnamed: 0,Country,Code,Year,Individuals using the Internet (% of population)
0,Afghanistan,AFG,1990,0.0
1,Afghanistan,AFG,2001,0.004723
2,Afghanistan,AFG,2002,0.004561
3,Afghanistan,AFG,2003,0.087891
4,Afghanistan,AFG,2004,0.105809


In [4]:
df.tail()

Unnamed: 0,Country,Code,Year,Individuals using the Internet (% of population)
6051,Zimbabwe,ZWE,2012,12.0
6052,Zimbabwe,ZWE,2013,15.5
6053,Zimbabwe,ZWE,2014,16.36474
6054,Zimbabwe,ZWE,2015,22.742818
6055,Zimbabwe,ZWE,2016,23.119989


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6056 entries, 0 to 6055
Data columns (total 4 columns):
 #   Column                                            Non-Null Count  Dtype  
---  ------                                            --------------  -----  
 0   Country                                           6056 non-null   object 
 1   Code                                              4912 non-null   object 
 2   Year                                              6056 non-null   int64  
 3   Individuals using the Internet (% of population)  6056 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 189.4+ KB


In [6]:
df.describe()

Unnamed: 0,Year,Individuals using the Internet (% of population)
count,6056.0,6056.0
mean,2004.076783,21.14759
std,8.236665,26.500519
min,1960.0,0.0
25%,1998.0,0.558526
50%,2005.0,7.352044
75%,2011.0,35.909658
max,2017.0,98.32361


In [13]:
df.rename(columns={'Individuals using the Internet (% of population)': 'internet_usage'}, inplace= True)

In [14]:
# get data from the year 2016
df_2016 = df.query('Year == 2016')

In [15]:
df_2016.head()

Unnamed: 0,Country,Code,Year,internet_usage
16,Afghanistan,AFG,2016,10.595726
39,Albania,ALB,2016,66.363445
63,Algeria,DZA,2016,42.945527
85,Andorra,AND,2016,97.930637
107,Angola,AGO,2016,13.0


In [16]:
df_2016.tail()

Unnamed: 0,Country,Code,Year,internet_usage
5960,Vietnam,VNM,2016,46.5
5985,World,OWID_WRL,2016,45.793684
6007,Yemen,YEM,2016,24.579208
6031,Zambia,ZMB,2016,25.506579
6055,Zimbabwe,ZWE,2016,23.119989


In [11]:
df_2016.info()

<class 'pandas.core.frame.DataFrame'>
Index: 249 entries, 16 to 6055
Data columns (total 4 columns):
 #   Column                                            Non-Null Count  Dtype  
---  ------                                            --------------  -----  
 0   Country                                           249 non-null    object 
 1   Code                                              203 non-null    object 
 2   Year                                              249 non-null    int64  
 3   Individuals using the Internet (% of population)  249 non-null    float64
dtypes: float64(1), int64(1), object(2)
memory usage: 9.7+ KB


In [17]:
df_2016.describe()

Unnamed: 0,Year,internet_usage
count,249.0,249.0
mean,2016.0,50.268889
std,0.0,27.495668
min,2016.0,1.177119
25%,2016.0,25.24625
50%,2016.0,52.191326
75%,2016.0,74.587726
max,2016.0,98.240016


In [None]:
# create choropleth in plotly
fig = px.choropleth(
    df_2016,
    locations= 'Code',
    color= 'internet_usage',
    hover_name= 'Country',
    color_continuous_scale= px.colors.sequential.Jet,
    width= 1000,
)
fig.update_layout(
    title_text = 'Internet Usage Across Africa in 2016',
    width = 1000,
    height = 1000,
    geo_scope= 'africa' # ['north america', 'africa', 'europe']
)
fig

In [29]:
fig = px.choropleth(
    df_2016,
    locations="Code",
    color="internet_usage",
    hover_name="Country",
    color_continuous_scale=px.colors.sequential.Jet,
    width=1000,
)

fig.update_layout(
    title_text="Internet Usage Across the World in 2016",
    width=1000,
    height=1000,
    geo=dict(
        projection_type="mercator",  # Turns it into a globe
        showcoastlines=True,  # Show coastlines for better visualization
        showland=True,  # Display landmass
    ),
    dragmode="zoom"  # Allows scrolling & zooming
)
fig.show()


#### Animations

In [34]:
# sort by year
df.sort_values(by='Year', inplace= True)

# adding animations to a Choropleth Map

fig = px.choropleth(
    df,
    locations="Code",
    color="internet_usage",
    hover_name="Country",
    animation_frame='Year',
    color_continuous_scale=px.colors.sequential.Jet,
)

fig.update_layout(
    title_text="Internet Usage Across the World by Year",
    width=1000,
    height=1000,
    geo=dict(
        projection_type="orthographic",  # Turns it into a globe
        showcoastlines=True,  # Show coastlines for better visualization
        showland=True,  # Display landmass
    ),
    dragmode="zoom"  # Allows scrolling & zooming
)
fig.show()

## Walmart Store Openings Data

In [36]:
walmart_df = pd.read_csv("walmart_store_openings.csv")

walmart_df.shape

(2992, 16)

In [37]:
walmart_df.head()

Unnamed: 0,storenum,OPENDATE,date_super,conversion,st,county,STREETADDR,STRCITY,STRSTATE,ZIPCODE,type_store,LAT,LON,MONTH,DAY,YEAR
0,1,7/1/62,3/1/97,1.0,5,7,2110 WEST WALNUT,Rogers,AR,72756,Supercenter,36.342235,-94.07141,7,1,1962
1,2,8/1/64,3/1/96,1.0,5,9,1417 HWY 62/65 N,Harrison,AR,72601,Supercenter,36.236984,-93.09345,8,1,1964
2,4,8/1/65,3/1/02,1.0,5,7,2901 HWY 412 EAST,Siloam Springs,AR,72761,Supercenter,36.179905,-94.50208,8,1,1965
3,8,10/1/67,3/1/93,1.0,5,29,1621 NORTH BUSINESS 9,Morrilton,AR,72110,Supercenter,35.156491,-92.75858,10,1,1967
4,7,10/1/67,,,5,119,3801 CAMP ROBINSON RD.,North Little Rock,AR,72118,Wal-Mart,34.813269,-92.30229,10,1,1967


In [38]:
walmart_df.tail()

Unnamed: 0,storenum,OPENDATE,date_super,conversion,st,county,STREETADDR,STRCITY,STRSTATE,ZIPCODE,type_store,LAT,LON,MONTH,DAY,YEAR
2987,5403,1/27/06,1/27/06,0.0,17,19,100 S High Cross Rd,Urbana,IL,61802,Supercenter,40.121648,-88.17649,1,27,2006
2988,3347,1/23/06,1/23/06,0.0,12,105,7450 Cypress Gardens Blvd,Winter Haven,FL,33884,Supercenter,27.997387,-81.68256,1,23,2006
2989,5485,1/27/06,,,17,31,2500 W 95th St,Evergreen Park,IL,60805,Wal-Mart,41.719933,-87.70249,1,27,2006
2990,3425,1/27/06,1/27/06,0.0,48,201,9598 Rowlett Rd,Houston,TX,77034,Supercenter,29.63643,-95.21789,1,27,2006
2991,5193,1/31/06,,,6,65,12721 Moreno Beach Dr,Moreno Valley,CA,92555,Wal-Mart,33.922823,-117.16837,1,31,2006


In [39]:
walmart_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2992 entries, 0 to 2991
Data columns (total 16 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   storenum    2992 non-null   int64  
 1   OPENDATE    2992 non-null   object 
 2   date_super  1946 non-null   object 
 3   conversion  1946 non-null   float64
 4   st          2992 non-null   int64  
 5   county      2992 non-null   int64  
 6   STREETADDR  2992 non-null   object 
 7   STRCITY     2992 non-null   object 
 8   STRSTATE    2992 non-null   object 
 9   ZIPCODE     2992 non-null   int64  
 10  type_store  2992 non-null   object 
 11  LAT         2992 non-null   float64
 12  LON         2992 non-null   float64
 13  MONTH       2992 non-null   int64  
 14  DAY         2992 non-null   int64  
 15  YEAR        2992 non-null   int64  
dtypes: float64(3), int64(7), object(6)
memory usage: 374.1+ KB


In [40]:
walmart_df.describe()

Unnamed: 0,storenum,conversion,st,county,ZIPCODE,LAT,LON,MONTH,DAY,YEAR
count,2992.0,1946.0,2992.0,2992.0,2992.0,2992.0,2992.0,2992.0,2992.0,2992.0
mean,1809.838235,0.800103,28.956217,101.030414,54077.080882,36.80316,-91.582521,6.432487,15.108623,1990.793783
std,1362.727189,0.400026,16.061538,105.534836,23652.129967,4.818443,12.051487,3.492092,10.999463,8.572069
min,1.0,0.0,1.0,1.0,10547.0,25.431506,-124.21086,1.0,1.0,1962.0
25%,777.75,1.0,16.0,37.0,33522.0,33.415925,-96.681565,3.0,3.0,1985.0
50%,1540.5,1.0,29.0,77.0,52648.0,36.753618,-88.95525,7.0,16.0,1990.0
75%,2534.25,1.0,45.0,125.0,75061.5,40.409729,-82.61928,10.0,26.0,1998.0
max,5498.0,1.0,56.0,840.0,99352.0,48.759079,-72.637078,12.0,31.0,2006.0


In [None]:
# create a scatter plot of walmart stores in the US
fig = go.Figure(
    data = go.Scattergeo(
        lon = walmart_df['LON'],
        lat = walmart_df['LAT'],
        text = walmart_df['STREETADDR'],
        mode = 'markers'
    )
)

fig.update_layout(
    title = 'Walmart Stores in the US',
    geo_scope = 'usa',
)

fig.show()

#### Bubble Plots

In [43]:
# walmart stores by state
walmart_state = walmart_df.groupby('STRSTATE').count()
walmart_state.info()

<class 'pandas.core.frame.DataFrame'>
Index: 41 entries, AL to WY
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   storenum    41 non-null     int64
 1   OPENDATE    41 non-null     int64
 2   date_super  41 non-null     int64
 3   conversion  41 non-null     int64
 4   st          41 non-null     int64
 5   county      41 non-null     int64
 6   STREETADDR  41 non-null     int64
 7   STRCITY     41 non-null     int64
 8   ZIPCODE     41 non-null     int64
 9   type_store  41 non-null     int64
 10  LAT         41 non-null     int64
 11  LON         41 non-null     int64
 12  MONTH       41 non-null     int64
 13  DAY         41 non-null     int64
 14  YEAR        41 non-null     int64
dtypes: int64(15)
memory usage: 5.1+ KB


In [44]:
walmart_state.head()

Unnamed: 0_level_0,storenum,OPENDATE,date_super,conversion,st,county,STREETADDR,STRCITY,ZIPCODE,type_store,LAT,LON,MONTH,DAY,YEAR
STRSTATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
AL,90,90,76,76,90,90,90,90,90,90,90,90,90,90,90
AR,81,81,60,60,81,81,81,81,81,81,81,81,81,81,81
AZ,55,55,40,40,55,55,55,55,55,55,55,55,55,55,55
CA,159,159,13,13,159,159,159,159,159,159,159,159,159,159,159
CO,56,56,43,43,56,56,56,56,56,56,56,56,56,56,56


In [45]:
walmart_state = walmart_state['storenum'].reset_index()
walmart_state.rename(columns= {'storenum': 'num_stores'}, inplace= True)
walmart_state.head()

Unnamed: 0,STRSTATE,num_stores
0,AL,90
1,AR,81
2,AZ,55
3,CA,159
4,CO,56


In [47]:
fig = px.scatter_geo(
    walmart_state,
    locations= 'STRSTATE',
    size = 'num_stores',
    locationmode= 'USA-states',
    hover_name= 'STRSTATE',
    size_max= 20
)

fig.update_layout(
    title_text = 'Walmart Stores in the US',
    geo_scope = 'usa',
)

fig.show()