In [1]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from bubbly.bubbly import bubbleplot 
from plotly.offline import iplot
import warnings
warnings.filterwarnings('ignore')

In [2]:
df_temp = pd.read_csv('top_temp_diff.csv') 
df_inc = pd.read_csv('top_income_diff.csv') 
df_temp.info()
df_inc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121 entries, 0 to 120
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   index       121 non-null    int64  
 1   Region      121 non-null    object 
 2   Country     121 non-null    object 
 3   AvgNew      121 non-null    float64
 4   AvgLast     121 non-null    float64
 5   difference  121 non-null    float64
dtypes: float64(3), int64(1), object(2)
memory usage: 5.8+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 262 entries, 0 to 261
Data columns (total 4 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Country                  262 non-null    object 
 1   GDP_per_capita_USD_New   262 non-null    float64
 2   GDP_per_capita_USD_Last  262 non-null    float64
 3   difference               262 non-null    float64
dtypes: float64(3), object(1)
memory usage: 8.3+ KB


In [3]:
df_temp=df_temp.sort_values(by=['Region','Country','Year']).reset_index().drop('index', axis=1)
df_temp

KeyError: 'Year'

In [None]:
df_inc=df_inc.rename(columns={"Country Name": "Country"})
df_inc

In [None]:
df = pd.merge(df_temp, df_inc, how ='inner', on =['Country', 'Year'])
df

In [None]:
figure = bubbleplot(dataset=df, x_column='GDP_per_capita_USD', y_column='AvgTemperature', 
    bubble_column='Country', time_column='Year', size_column='GDP_per_capita_USD', color_column='Region', 
    x_title="GDP per Capita", y_title="Average Temperature", title='Avg Temp vs GDP',
    x_logscale=True, scale_bubble=3, height=650)

iplot(figure, config={'scrollzoom': True})

In [None]:
# global yearly stats:
# - average, min, max temperature per year 
# - date and location (city/country/region) of lowest temperature during this year
# - date and location (city/country/region) of highest temperature during this year
dfg = (df.groupby('Year')['AvgTemperature'].agg(['mean','min','idxmin','max','idxmax']).reset_index()
       .merge(df[['Region','Country']], left_on='idxmin',right_index=True)
       .merge(df[['Region','Country']], left_on='idxmax',right_index=True,suffixes=('_min','_max')) )

# top hottest/coldest cities over the entire period
dft = df.groupby(['Region','Country'])['AvgTemperature'].mean().sort_values(ascending=False).reset_index()
dfi = df.groupby(['Region','Country'])['GDP_per_capita_USD'].mean().sort_values(ascending=False).reset_index()

fig = make_subplots(rows=3,cols=2,
                    column_widths=[0.5, 0.5],row_heights=[0.5, 0.5, 0.5],
                    vertical_spacing=0.15,
                    specs=[[{"type": "scatter", "colspan": 2},None],
                           [{"type": "bar"},{"type": "bar"}],
                           [{"type": "bar"},{"type": "bar"}]],
                    subplot_titles=['Global temperatures over the years 1995 to 2019','Top 20 coldest countries','Top 20 hottest countries',
                                   'Top 20 highest income', 'Top 20 lowest income'],
                    y_title='Average temperature °C')

# global temperature trend graph
trace = (px.scatter(dfg, x='Year', y='mean',trendline='ols',trendline_color_override='red')
         .add_trace(px.line(dfg, x='Year', y='mean').data[0])
         .update_traces(hovertemplate='<b>%{x}</b><br><i>Avg temp :<b> %{y}</b></i><br>%{text}',
                        text = ['Min temp : <b>'+str(d['min'])+'</b>, country : '+d['Country_min'] +'<br>'+'Max temp : <b>'+str(d['max'])+'</b>, country : '+d['Country_max']
                                for _, d in dfg.iterrows()],
                        hoverlabel_bgcolor='white')).data

fig.add_trace(trace[0], row=1, col=1)
fig.add_trace(trace[1], row=1, col=1)
fig.add_trace(trace[2], row=1, col=1)

# hottest cities graph
fig.add_trace((px.bar(dft.head(20),
                      x='Country',
                      y='AvgTemperature',
                      color='AvgTemperature',
                      color_continuous_scale=['darkorange','red'],
                      hover_data=['Country', 'AvgTemperature'],
                      opacity=0.8)).data[0],
              row=2, col=2)

# coldest cities graph
fig.add_trace((px.bar(dft.tail(20),
                      x='Country',
                      y='AvgTemperature',
                      color='AvgTemperature',
                      color_continuous_scale=['blue','lightblue'],
                      hover_data=['Country', 'AvgTemperature'],
                      opacity=0.8)).data[0],
              row=2, col=1)

# highest income graph
fig.add_trace((px.bar(dfi.head(20),
                      x='Country',
                      y='GDP_per_capita_USD',
                      color='GDP_per_capita_USD',
                      color_continuous_scale=['darkorange','red'],
                      hover_data=['Country', 'GDP_per_capita_USD'],
                      opacity=0.8)).data[0],
              row=3, col=1)

# lowest income graph
fig.add_trace((px.bar(dfi.tail(20),
                      x='Country',
                      y='GDP_per_capita_USD',
                      color='GDP_per_capita_USD',
                      color_continuous_scale=['blue','lightblue'],
                      hover_data=['Country', 'GDP_per_capita_USD'],
                      opacity=0.8)).data[0],
              row=3, col=2)

fig.update_layout(height=1100, margin=dict(r=10, t=40, b=50, l=60))
fig.update_layout(coloraxis_autocolorscale=False, coloraxis_colorscale=['blue','lightblue','yellow','orange','darkorange','red'],coloraxis_colorbar_title='Temp °C')