In [56]:
import pandas as pd 

In [57]:
# read in the oecd data
df = pd.read_csv('data/gdpph_oecd.csv')

df = df[['Country','LOCATION', 'Measure','TIME_PERIOD', 'OBS_VALUE']]

df.columns = ['country', 'id', 'measure', 'year', 'value']

# sort by country and year
df = df.sort_values(by=['country', 'year'])

df

Unnamed: 0,country,id,measure,year,value
3,Australia,AUS,US dollars,1990,35.589084
1435,Australia,AUS,2015=100,1990,65.728880
1367,Australia,AUS,US dollars,1991,36.406827
1371,Australia,AUS,2015=100,1991,67.239157
472,Australia,AUS,2015=100,1992,69.306729
...,...,...,...,...,...
1239,United States,USA,2015=100,2020,107.111176
231,United States,USA,US dollars,2021,74.838217
236,United States,USA,2015=100,2021,108.386615
698,United States,USA,2015=100,2022,107.025198


In [58]:
# Pivot the dataframe to create separate columns for each measure
df_pivot = df.pivot_table(index=['country', 'id', 'year'], columns='measure', values='value').reset_index()

# Rename the columns for clarity
df_pivot.columns.name = None
df_pivot.rename(columns={'US dollars': 'value', '2015=100': 'index'}, inplace=True)

df_pivot

Unnamed: 0,country,id,year,index,value
0,Australia,AUS,1990,65.728880,35.589084
1,Australia,AUS,1991,67.239157,36.406827
2,Australia,AUS,1992,69.306729,37.526320
3,Australia,AUS,1993,69.456332,37.607323
4,Australia,AUS,1994,69.609837,37.690439
...,...,...,...,...,...
1184,United States,USA,2018,102.342200,70.664702
1185,United States,USA,2019,103.581713,71.520554
1186,United States,USA,2020,107.111176,73.957559
1187,United States,USA,2021,108.386615,74.838217


In [59]:
df_pivot.dtypes

country     object
id          object
year         int64
index      float64
value      float64
dtype: object

In [60]:
df_pivot[df_pivot['id'] == 'USA']

Unnamed: 0,country,id,year,index,value
1156,United States,USA,1990,66.017332,45.583299
1157,United States,USA,1991,66.882229,46.180488
1158,United States,USA,1992,69.179788,47.766894
1159,United States,USA,1993,69.44318,47.94876
1160,United States,USA,1994,70.03144,48.354938
1161,United States,USA,1995,70.185526,48.461331
1162,United States,USA,1996,71.930383,49.666111
1163,United States,USA,1997,72.97303,50.386032
1164,United States,USA,1998,74.614716,51.519575
1165,United States,USA,1999,76.818796,53.041436


In [61]:
# Change the base year for the index to 1990
base_year = 1990

# Get the base values for the year 1990
base_values_1990 = df_pivot[df_pivot['year'] == base_year].set_index('country')['value']

# Update the index column to reflect the new base year
df_pivot['index'] = df_pivot.apply(lambda row: row['value'] / base_values_1990[row['country']] * 100, axis=1)

df_pivot

KeyError: 'Austria'

In [62]:
# create a version with just g7 countries
g7_countries = ['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom', 'United States']
g7 = df_pivot[df_pivot['country'].isin(g7_countries)]

g7

Unnamed: 0,country,id,year,index,value
94,Canada,CAN,1990,72.860442,37.329579
95,Canada,CAN,1991,73.501045,37.657788
96,Canada,CAN,1992,75.004661,38.428156
97,Canada,CAN,1993,76.522221,39.205668
98,Canada,CAN,1994,78.043471,39.985071
...,...,...,...,...,...
1184,United States,USA,2018,102.342200,70.664702
1185,United States,USA,2019,103.581713,71.520554
1186,United States,USA,2020,107.111176,73.957559
1187,United States,USA,2021,108.386615,74.838217


In [40]:
# change g7 year to datetime
g7['year'] = pd.to_datetime(g7['year'], format='%Y')

g7

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  g7['year'] = pd.to_datetime(g7['year'], format='%Y')


Unnamed: 0,country,id,year,index,value
94,Canada,CAN,1990-01-01,72.860442,37.329579
95,Canada,CAN,1991-01-01,73.501045,37.657788
96,Canada,CAN,1992-01-01,75.004661,38.428156
97,Canada,CAN,1993-01-01,76.522221,39.205668
98,Canada,CAN,1994-01-01,78.043471,39.985071
...,...,...,...,...,...
1184,United States,USA,2018-01-01,102.342200,70.664702
1185,United States,USA,2019-01-01,103.581713,71.520554
1186,United States,USA,2020-01-01,107.111176,73.957559
1187,United States,USA,2021-01-01,108.386615,74.838217


In [64]:
# change g7 year to datetime
g7['year'] = pd.to_datetime(g7['year'], format='%Y')

g7.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  g7['year'] = pd.to_datetime(g7['year'], format='%Y')


country            object
id                 object
year       datetime64[ns]
index             float64
value             float64
dtype: object

In [65]:
# change g7 index base year to 1990
base_year = 1990

# Get the base values for the year 1990
base_values_1990 = g7[g7['year'] == pd.Timestamp(str(base_year))].set_index('country')['value']

# Update the index column to reflect the new base year
g7['index'] = g7.apply(lambda row: row['value'] / base_values_1990[row['country']] * 100, axis=1)

g7

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  g7['index'] = g7.apply(lambda row: row['value'] / base_values_1990[row['country']] * 100, axis=1)


Unnamed: 0,country,id,year,index,value
94,Canada,CAN,1990-01-01,100.000000,37.329579
95,Canada,CAN,1991-01-01,100.879220,37.657788
96,Canada,CAN,1992-01-01,102.942913,38.428156
97,Canada,CAN,1993-01-01,105.025744,39.205668
98,Canada,CAN,1994-01-01,107.113640,39.985071
...,...,...,...,...,...
1184,United States,USA,2018-01-01,155.023229,70.664702
1185,United States,USA,2019-01-01,156.900785,71.520554
1186,United States,USA,2020-01-01,162.247052,73.957559
1187,United States,USA,2021-01-01,164.179027,74.838217


In [67]:
g7[g7['year'] == pd.Timestamp('1990-01-01')]

Unnamed: 0,country,id,year,index,value
94,Canada,CAN,1990-01-01,100.0,37.329579
344,France,FRA,1990-01-01,100.0,47.626606
377,Germany,DEU,1990-01-01,100.0,45.617388
574,Italy,ITA,1990-01-01,100.0,45.001814
607,Japan,JPN,1990-01-01,100.0,31.293218
1123,United Kingdom,GBR,1990-01-01,100.0,38.416611
1156,United States,USA,1990-01-01,100.0,45.583299


In [69]:
g7.to_csv('data/g7_productivity.csv', index=False)

In [36]:
df.to_csv('data/gdpph_clean.csv', index=False)

In [8]:
import requests

# Define the URL for the OECD API for the UK
url = "https://stats.oecd.org/SDMX-JSON/data/PDB_LV/GBR.GDPHRS/all?startTime=1990&endTime=2022"

# Fetch the data from the API
response = requests.get(url)
data = response.json()

# Inspect the structure of the data dictionary
print(data.keys())

# Extract the relevant data
if 'dataSets' in data:
	uk_data = data['dataSets'][0]['series']['0:0:0:0:0']['observations']
else:
	print("Key 'dataSets' not found in the data dictionary")
	uk_data = {}

# Convert the data to a pandas DataFrame
uk_gdp_per_hour = pd.DataFrame.from_dict(uk_data, orient='index', columns=['value'])
uk_gdp_per_hour['year'] = pd.to_datetime([f"{1990 + int(i)}-01-01" for i in uk_gdp_per_hour.index])
uk_gdp_per_hour.set_index('year', inplace=True)

uk_gdp_per_hour

dict_keys(['meta', 'data', 'errors'])
Key 'dataSets' not found in the data dictionary


Unnamed: 0_level_0,value
year,Unnamed: 1_level_1
