In [1]:
import pandas as pd

In [2]:
# Read in land temperatures dataset
csv_path1 = "GlobalLandTemperaturesByCountry.csv"
df = pd.read_csv(csv_path1)
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [3]:
# Create new column that converts the date from an objecct to datetime
df['datetime'] = pd.to_datetime(df['dt'])
df.dtypes

dt                                       object
AverageTemperature                      float64
AverageTemperatureUncertainty           float64
Country                                  object
datetime                         datetime64[ns]
dtype: object

In [4]:
# Create new column that grabs the year from the datetime column
df["Year"] = df["datetime"].dt.to_period('Y')
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,datetime,Year
0,1743-11-01,4.384,2.294,Åland,1743-11-01,1743
1,1743-12-01,,,Åland,1743-12-01,1743
2,1744-01-01,,,Åland,1744-01-01,1744
3,1744-02-01,,,Åland,1744-02-01,1744
4,1744-03-01,,,Åland,1744-03-01,1744


In [5]:
# Filter for years 1990 and beyond to match carbon emissions dataset
df = df[df["Year"] >= "1990"]
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,datetime,Year
2954,1990-01-01,0.515,0.359,Åland,1990-01-01,1990
2955,1990-02-01,3.35,0.629,Åland,1990-02-01,1990
2956,1990-03-01,3.134,0.231,Åland,1990-03-01,1990
2957,1990-04-01,4.846,0.255,Åland,1990-04-01,1990
2958,1990-05-01,8.708,0.378,Åland,1990-05-01,1990


In [6]:
# Get each country's average annual land temperature using groupby
df = df.groupby(["Country", "Year"]).mean()
df.reset_index(inplace=True)
df.head()

Unnamed: 0,Country,Year,AverageTemperature,AverageTemperatureUncertainty
0,Afghanistan,1990,14.993333,0.362417
1,Afghanistan,1991,14.37075,0.411
2,Afghanistan,1992,14.056083,0.501667
3,Afghanistan,1993,14.43925,0.53925
4,Afghanistan,1994,14.75475,0.396833


In [7]:
# Pivot DataFrame to make each year a column
df = df.pivot(index='Country', columns='Year', values='AverageTemperature')

# Reset index
df.reset_index(inplace=True)
df.rename_axis(None, axis = 1, inplace=True)
df.dropna(inplace=True)

# Round all values to 2 decimal places
df_country = df['Country']
df = df.drop('Country', axis=1).round(2)
df.insert(0, 'Country', df_country)
df

Unnamed: 0,Country,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
0,Afghanistan,14.99,14.37,14.06,14.44,14.75,14.86,14.43,14.90,15.13,...,15.77,14.98,15.60,15.11,15.22,15.26,15.83,15.52,14.48,16.53
1,Africa,24.70,24.42,24.34,24.54,24.44,24.72,24.63,24.71,25.02,...,24.90,25.16,24.82,24.85,24.75,25.03,25.47,24.79,24.73,25.21
2,Albania,13.47,12.34,13.08,13.12,14.10,12.78,12.74,12.90,13.31,...,13.26,12.75,12.98,13.89,13.96,13.84,13.78,13.44,13.77,14.99
3,Algeria,24.01,23.01,22.77,23.45,23.64,23.72,23.73,23.96,23.92,...,23.92,24.22,24.20,24.07,23.95,24.15,25.22,24.14,23.95,25.12
4,American Samoa,27.16,27.17,27.07,26.82,26.99,27.27,27.11,26.96,27.42,...,27.28,27.37,27.08,27.45,27.00,27.03,27.45,27.01,27.20,27.52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,Western Sahara,23.40,22.64,22.73,22.30,22.94,23.37,23.26,23.64,23.69,...,23.42,23.51,23.44,23.15,23.32,23.38,24.11,23.40,23.30,23.74
239,Yemen,26.64,26.59,26.27,26.40,26.99,26.58,26.55,26.94,27.37,...,27.33,27.29,27.18,27.31,26.81,27.34,27.30,27.29,27.45,28.13
240,Zambia,22.22,21.63,22.41,21.71,21.85,22.45,22.02,21.95,22.30,...,21.77,22.81,21.78,21.84,21.54,21.67,22.27,21.77,21.70,21.20
241,Zimbabwe,22.13,21.79,22.84,21.85,21.61,22.50,21.55,21.57,22.14,...,21.33,22.51,21.62,21.60,21.55,21.38,21.99,21.60,21.52,20.71


In [9]:
# Save DataFrame as json file
df.to_json('clean_land_temps.json', orient='records')