In [None]:
import numpy as np
import pandas as pd
import plotly as py
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from tabulate import tabulate
import seaborn as sns

# 1. Data Exploration

In [None]:
df = pd.read_csv("data/New_Global_Mobility_Report.csv", low_memory=False)

In [None]:
df.head()

In [None]:
df.country_region.unique()

In [None]:
df.shape

# 2. Dataframe Formatting

### Rename columns 

In [None]:
df = df.rename(columns={'country_region':'Country'})
df = df.rename(columns={'date':'Date'})
df = df.rename(columns={'retail_and_recreation_percent_change_from_baseline':'retail'})
df = df.rename(columns={'grocery_and_pharmacy_percent_change_from_baseline':'pharmacy'})
df = df.rename(columns={'parks_percent_change_from_baseline':'parks'})
df = df.rename(columns={'transit_stations_percent_change_from_baseline':'transit_station'})
df = df.rename(columns={'workplaces_percent_change_from_baseline':'workplaces'})
df = df.rename(columns={'residential_percent_change_from_baseline':'residential'})


### Delete columns we will not use

In [None]:
df.drop(['country_region_code','sub_region_1', 'sub_region_2', 'census_fips_code', 'iso_3166_2_code'], axis=1, inplace = True)

## Create dataframe1 
Only looking at the data of six specific countries

In [None]:
df1 = df.groupby(['Country','Date']).mean().reset_index()
c1 = df1[df1['Country']=="India"]
c2 = df1[df1['Country']=="Sweden"]
c3 = df1[df1['Country']=="United States"]
c4 = df1[df1['Country']=="New Zealand"]
c5 = df1[df1['Country']=="France"]
c6 = df1[df1['Country']=="Italy"]

frames = [c1, c2, c3, c4, c5, c6]
df2 = pd.concat(frames)

# Plot line graph

In [None]:
fig = px.line(df2, x="Date", y="retail", title='retail', color = 'Country')
fig.show()

In [None]:
df3 = df.groupby(['Date','Country']).mean().reset_index()

# Plot Map
This is the linkt to the documentation for mapping on Plotly: https://plotly.com/python/choropleth-maps/

In [None]:
fig = px.choropleth(df3,
                    locations="Country",
                    locationmode="country names",
                    color="retail",
                    hover_name="Country",
                    animation_frame="Date",
                    range_color=(-100, 50),
                    color_continuous_scale=px.colors.diverging.Picnic
                    )
fig.update_layout(
    title_text='Retail ',
    title_x=0.5,
    geo=dict(
        showframe=False,
        showcoastlines=False,
    ))
# fig.show()

# Format dataframe before plotting scatter plot

### Find mean by country using the groupby( ) function

In [None]:
df_country = df.groupby(['Country'])['retail'].mean().reset_index(name ='Mean_Retail')

### Set datetime type
The DATETIME type is used for values that contain both date and time parts.

It supports a wide variety of time formats such as: 
* __Year__ : [%Y = 2020] OR [%y = 20] (with or without century)
* __Month__ : %m
* __Day__ : %d  

To create a full date and time combination, we concatenate the time values. 
* __Year - Month - Date__ ==> 2020-06-27 ==> %Y-%m-%d
* __Year / Month / Date__ ==> 20/06/27 ==> %y/%m/%d

Examples of multiple datetime formats and variables can be found here: https://www.w3schools.com/python/python_datetime.asp


In [None]:
df_new = df.groupby(['Country', 'Date']).mean().reset_index().sort_values('Date', ascending=False)
df_new['Date'] = pd.to_datetime(df_new['Date'], format = '%Y-%m-%d')
df_new.set_index('Date', inplace=True)

### Truncate dataframe only keep dates after June 15th 2020


In [None]:
df_new = df_new.sort_index()
df_truncated = df_new.truncate(before='2020-06-15 00:00:00')
df_truncated

In [None]:
# Find the maximum!!!!!
df_truncated = df_truncated.groupby(['Country'])['retail'].mean().reset_index(name ='Maximum')

# Merge
merged = df_country.merge(df_truncated, left_on='Country', right_on='Country')

# Scatter Plot

In [None]:
fig = px.scatter(merged, x="Mean_Retail", y="Maximum",  labels={'x':'Worst', 'y':'Best'}, size_max=30)
fig.show()