In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

pd.set_option('display.max_columns', None)
plt.rcParams["figure.figsize"] = (20,7)

In [2]:
countries_mapping = {
    "134": "Germany",
    "111": "UnitedStates",
    "132": "France",
    "193": "Australia",
    "156": "Canada",
    "112": "UnitedKingdom",
    "122": "Austria",
    "124": "BelgiumLuxembourg",
    "138": "NetherlandsThe",
    "136": "Italy",
    "124": "Belgium",
    "128": "Denmark",
    "142": "Norway",
    "144": "Sweden",
    "146": "Switzerland",
    "158": "Japan",
    "174": "Greece",
    "172": "Finland",
    "184": "Spain",
    "178": "Ireland",
    "182": "Portugal"
}

list_countries = ["Germany", "UnitedStates", "France", "Australia", "Canada", "UnitedKingdom", 
 "Austria", "Italy", 
 "Denmark", "Norway", "Sweden", "Switzerland", "Japan", "Greece", 
 "Finland", "Spain", "Ireland", "Portugal"]

In [8]:
def clean_trade_dataset(df):
    """
    Takes the df as provided by the imf and returns it as a clean and usable dataframe
    """
    df_transposed = df.iloc[5:,1:].T
    df_transposed.columns = df_transposed.iloc[0]
    df_transposed = df_transposed.iloc[1:, :]
    df_transposed.rename(columns={np.nan: "Year"}, inplace=True)
    df_transposed["Year"] = df_transposed["Year"].astype("int")
    df_transposed = df_transposed.set_index("Year")
    return df_transposed

def process_nominal_GDP(df):
    df = df.set_index("Year")
    df = df.replace("...", np.nan)
    for column in df.columns:
        df = df.astype({column:'float'})
    df = df.rename(columns={
        "United States": "UnitedStates",
        "United Kingdom": "UnitedKingdom",
    })
    return df.loc[1960:]

def process_real_GDP(df):
    df.columns = df.loc["Country"]
    df = (
        df
        .iloc[3:, ]
        .replace("...", np.nan)
        .rename(columns={
            "New Zealand": "NewZealand",
            "Netherlands, The": 'NetherlandsThe',
            "United States": "UnitedStates",
            "United Kingdom": "UnitedKingdom",
        })
    )

    for column in df.columns:
        df[column] = df[column].str.replace(" ","")
        df = df.astype({column:'float'})
    
    df = df.rename(columns={"Country": "date"})
    df["year"] = df.index.str[:4].astype(int)
    df["quarter"] = df.index.str[4:]
    df["month"] = df["quarter"].replace({"Q1": 1, "Q2": 4, "Q3": 7, "Q4": 10})
    df['date'] = pd.to_datetime(dict(year=df["year"], month=df["month"], day=1))
    df = df.set_index("date")
    return df

def get_trade_activity(country_i, country_j):
    """
    Takes 2 countries as input
    Returns a dataframe with trade activity between these two countries
    X_i_j: export from country i to j
    X_i: total global export from country i
    M_i_j: import of country i from j
    M_i: total global import of country i
    Y_i: GDP of country i
    """
    X_i_j = dict_export[country_i][[country_j]].rename(columns={country_j: f"Export from {country_i} to {country_j}"})
    X_i = dict_export[country_i][["World"]].rename(columns={"World": f"Global Export from {country_i} to world"})
    X_j = dict_export[country_j][["World"]].rename(columns={"World": f"Global Export from {country_j} to world"})
    M_i_j = dict_import[country_i][[country_j]].rename(columns={country_j: f"Import of {country_i} from {country_j}"})
    M_i = dict_import[country_i][["World"]].rename(columns={"World": f"Global Import of {country_i}"})
    M_j = dict_import[country_j][["World"]].rename(columns={"World": f"Global Import of {country_j}"})
    Y_real_i = real_GDP_yearly[[country_i]].rename(columns={country_i: f"GDP of {country_i}"})
    Y_real_j = real_GDP_yearly[[country_j]].rename(columns={country_j: f"GDP of {country_j}"})
    Y_nominal_i = nominalGDP[[country_i]].rename(columns={country_i: f"GDP of {country_i}"})
    Y_nominal_j = nominalGDP[[country_j]].rename(columns={country_j: f"GDP of {country_j}"})
    W_i_j = pd.concat([X_i_j, X_i, M_i_j, M_i, X_j, M_j, Y_real_i, Y_real_j,Y_nominal_i, Y_nominal_j], axis=1)
    W_i_j["wt: Trade Intensity by bilateral trade"] = (X_i_j.values + M_i_j.values) / (X_i.values + X_j.values + M_i.values + M_j.values)
    W_i_j["wy_real: Trade Intensity by real GDP"] = (X_i_j.values + M_i_j.values) / (Y_real_i.values + Y_real_j.values)
    W_i_j["wy_nominal: Trade Intensity by nominal GDP"] = (X_i_j.values + M_i_j.values) / (Y_nominal_i.values + Y_nominal_j.values)
    W_i_j["wm"] = M_i_j.values / (M_i.values + M_j.values)
    W_i_j["wx"] = X_i_j.values / (X_i.values + X_j.values)
    for column in W_i_j.columns:
        W_i_j[column] = W_i_j[column].astype(float)
    return W_i_j

def get_average_period(df_trade_activity, date1, date2, country_i, country_j):
    return (np.abs(pd.DataFrame(
                df_trade_activity[country_i][country_j][["wt: Trade Intensity by bilateral trade",
                                                         "wy_real: Trade Intensity by real GDP",
                                                         "wy_nominal: Trade Intensity by nominal GDP",
                                                         "wm",
                                                         "wx"]]\
                    .loc[date1:date2].mean()))).T

def process_concept_data(df):
    df = df[["Value", "TIME", "Country"]]
    df["Value"] = np.log(df["Value"])
    df["Year"] = df["TIME"].astype(str).str[:4]
    df["Quarter"] = df["TIME"].astype(str).str[-1].astype(int)
    df["Month"] = (df["Quarter"]-1)*3+1
    df['Date'] = pd.to_datetime(dict(year=df["Year"], month=df["Month"], day=1))
    return df

In [4]:
path_nominal_GDP = "data_StatApp/imf/NominalGDP.xls"
xls_nominalGDP = pd.ExcelFile(path_nominal_GDP)
nominalGDP = pd.read_excel(xls_nominalGDP, 'Sheet1')

path_real_GDP = "/Users/victorgraff/Documents/2022:2023 ENSAE/Cours/Statapps/data_StatApp/imf/real_GDP_corrected.csv"
real_GDP = pd.read_csv(path_real_GDP, on_bad_lines='skip').T

In [5]:
nominalGDP = process_nominal_GDP(nominalGDP)
real_GDP = process_real_GDP(real_GDP)
real_GDP_yearly = real_GDP.groupby("year").sum()

real_GDP_yearly = real_GDP_yearly.loc[1960:2020]

real_GDP_yearly = real_GDP_yearly.replace(0.0, np.nan)

real_GDP = real_GDP[list_countries]

real_GDP_yearly.head()

Country,Argentina,Australia,Austria,Belgium,Brazil,Bulgaria,Canada,Chile,"China, P.R.: Hong Kong",Colombia,Costa Rica,"Croatia, Rep. of",Cyprus,Czech Rep.,Denmark,Ecuador,El Salvador,"Estonia, Rep. of",Euro Area,Finland,France,Germany,Greece,Honduras,Hungary,India,Indonesia,Ireland,Israel,Italy,Japan,Jordan,"Korea, Rep. of",Latvia,Lithuania,Luxembourg,Malta,Mexico,"Moldova, Rep. of",NetherlandsThe,NewZealand,Norway,Philippines,"Poland, Rep. of",Portugal,Romania,Russian Federation,Saudi Arabia,"Serbia, Rep. of",Singapore,Slovak Rep.,"Slovenia, Rep. of",South Africa,Spain,Sweden,Switzerland,Thailand,Turkey,Ukraine,UnitedKingdom,UnitedStates,month
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1
1960,,277471.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,29070100.0,,,,,,,,,,,,,,,,,,,,,,,,,,,576144.0,3262061.2,22
1961,,277569.0,,,,,355591.9,,,,,,,,,,,,,,,,,,,,,,,,,,31086500.0,,,,,,,,,,,,,,,,,,,,,,,,,,,591529.0,3345690.3,22
1962,,292573.0,,,,,381995.9,,,,,,,,,,,,,,,,,,,,,,,,,,32297200.0,,,,,,,,,,,,,,,,,,,,,,,,,,,597893.0,3550683.8,22
1963,,310885.0,,,,,402430.9,,,,,,,,,,,,,,,,,,,,,,,,,,35210600.0,,,,,,,,,,,,,,,,,,,,,,,,,,,626797.0,3705317.8,22
1964,,329918.0,,,,,429163.9,,,,,,,,,,,,,,,,,,,,,,,,,,38546500.0,,,,,,,,,,,,,,,,,,,,,,,,,,,662544.0,3918790.9,22


## Trade activity

#### For each country, aggregation of import and export with all other countries available

In [6]:

dict_paths = {}
dict_xls = {}
# dict export contains a set per country, representing the exports from this country to partners
dict_export = {}
# dict import contains a set per country, representing the imports of this country from partners
dict_import = {}

for country in list_countries:
    dict_paths[country] = f"data_StatApp/imf/trade/{country}.xls"
    dict_xls[country] = pd.ExcelFile(dict_paths[country])
    dict_export[country] = pd.read_excel(dict_xls[country], dict_xls[country].sheet_names[0], index_col=None)
    dict_export[country] = clean_trade_dataset(dict_export[country])\
                            .rename(columns={
        "United States": "UnitedStates",
        "United Kingdom": "UnitedKingdom",
    })
    dict_export[country] = dict_export[country].T.drop_duplicates().T
    dict_import[country] = pd.read_excel(dict_xls[country], dict_xls[country].sheet_names[1], index_col=None)
    dict_import[country] = clean_trade_dataset(dict_import[country])\
                            .rename(columns={
        "United States": "UnitedStates",
        "United Kingdom": "UnitedKingdom",
    })
    dict_import[country] = dict_import[country].T.drop_duplicates().T
    for column in dict_import[country].columns:
        dict_import[country][column] = dict_import[country][column].astype("float")
        dict_import[country][column] = dict_import[country][column].round(2)




#### For each pair of countries, aggregate all the trade activity informations

In [None]:
trade_activity_countries = {}
for country_i in list_countries:
    trade_activity_countries[country_i] = {}
    for country_j in list_countries:
        if country_i != country_j:
            trade_activity_countries[country_i][country_j] = get_trade_activity(country_i, country_j)


In [14]:
trade_activity_countries["France"]["Germany"].head()

Unnamed: 0,Export from France to Germany,Global Export from France to world,Import of France from Germany,Global Import of France,Global Export from Germany to world,Global Import of Germany,GDP of France,GDP of Germany,GDP of France.1,GDP of Germany.1,wt: Trade Intensity by bilateral trade,wy_real: Trade Intensity by real GDP,wy_nominal: Trade Intensity by nominal GDP,wm,wx
1960,943.2,6753.8,990.3,6065.6,11381.0,10013.0,,,46834.0,,0.056513,,,0.061591,0.05201
1961,1095.6,7099.5,1139.9,6488.2,12648.0,10882.0,,,50775.0,,0.060227,,,0.065624,0.05548
1962,1271.9,7225.5,1324.2,7315.2,13235.0,12240.0,,,56906.0,,0.064877,,,0.067716,0.062164
1963,1341.5,7926.6,1572.2,8523.9,14587.0,12952.0,,,63794.0,,0.066236,,,0.073208,0.059586
1964,1565.4,8804.3,1846.5,9899.2,16176.0,14635.0,,,70755.0,,0.068907,,,0.075262,0.062665


#### Aggregate all trading information in the same format as Frankel Rose data

In [12]:
dy_df = pd.DataFrame(columns=["country_1", "country_2", "period"])
trade_activity_countries_split = {}

for country_i in list_countries:
    trade_activity_countries_split[country_i] = {}
    for country_j in list_countries:
        if country_i != country_j:
            for (period, date1, date2) in [(1, 1960, 1967), (2, 1967, 1976), (3,1976, 1985), (4, 1985, 1994)]:
                df_countries_ij = pd.DataFrame(data={"country_1": [country_i], "country_2": [country_j], "period": [period]})
                df_countries_ij[["wt", "wy_real", "wy_nominal", "wm", "wx"]] = get_average_period(trade_activity_countries, date1, date2, country_i, country_j)[["wt: Trade Intensity by bilateral trade", "wy_real: Trade Intensity by real GDP", "wy_nominal: Trade Intensity by nominal GDP", "wm", "wx"]]
                dy_df = pd.concat([dy_df, df_countries_ij])


In [15]:
dy_df.head()

Unnamed: 0,country_1,country_2,period,wt,wy_real,wy_nominal,wm,wx
0,Germany,UnitedStates,1,0.041798,,,0.055624,0.030208
0,Germany,UnitedStates,2,0.038079,,0.004999,0.037855,0.038372
0,Germany,UnitedStates,3,0.030046,,0.005816,0.027298,0.033491
0,Germany,UnitedStates,4,0.031762,0.00459,0.006596,0.024993,0.039821
0,Germany,France,1,0.067779,,,0.066885,0.068684


### Comparison with Frankel Rose data

In [16]:
FR_data_path = "/Users/victorgraff/Documents/2022:2023 ENSAE/Cours/Statapps/data_StatApp/Data_FrankelRose.csv"
FR_data = pd.read_csv(FR_data_path)

In [17]:
FR_data["count1"] = FR_data["count1"].astype(str)
FR_data["count2"] = FR_data["count2"].astype(str)
FR_data["count1"] = FR_data["count1"].replace(countries_mapping)
FR_data["count2"] = FR_data["count2"].replace(countries_mapping)
FR_data = FR_data.rename(columns={"count1": "country_1", "count2": "country_2"})

In [18]:
FR_data.head()

Unnamed: 0,period,country_1,country_2,dy,di,de,du,ty,ti,te,tu,hy,hi,he,hu,sy,si,se,su,oy,oi,oe,ou,oilpxmy,doilpxmy,wx,wm,wt,c1pop,c2pop,c1rgdpch,c2rgdpch,lwt,lwx,lwm,dlwt,ddy,ddi,dde,ddu,dty,dti,dte,dtu,dhy,dhi,dhe,dhu,dsy,dsi,dse,dsu
0,1,UnitedStates,UnitedKingdom,-0.085669,0.070814,-0.102476,-0.073445,0.434204,0.403422,-0.70902,0.831643,-0.0921,-0.043734,0.268915,0.010287,0.202187,0.040077,0.234825,0.004616,-0.127651,-0.205753,0.596627,0.770944,0.000554,5.6e-05,0.063912,0.061481,0.06275,188532.7,53617.91,10834.69,7267.1709,-2.768591,-2.750245,-2.78903,,,,,,,,,,,,,,,,,
1,2,UnitedStates,UnitedKingdom,0.65181,0.618985,0.485638,0.45,0.560324,0.788419,0.519007,-0.066226,0.743788,0.739684,0.552337,0.561581,0.775855,0.74435,0.530727,0.559785,0.609409,0.714449,0.496574,0.292697,0.085774,1.120762,0.055754,0.049371,0.052393,208734.0,55852.711,13485.14,8893.9141,-2.948975,-2.886801,-3.008402,-0.180384,0.737479,0.548171,0.588114,0.523445,0.12612,0.384996,1.228027,-0.897869,0.835888,0.783419,0.283421,0.551294,0.573669,0.704273,0.295902,0.555169
2,3,UnitedStates,UnitedKingdom,0.489485,0.38034,0.731714,0.537802,0.793406,0.608886,0.812848,0.733142,0.574553,0.386985,0.85277,0.651506,0.63167,0.431305,0.799628,0.648825,0.565606,0.474865,0.923205,0.707039,-0.069624,0.003535,0.058016,0.050574,0.053977,228449.3,56304.859,15362.97,10267.63,-2.919197,-2.847043,-2.98432,0.029777,-0.162325,-0.238645,0.246076,0.087801,0.233081,-0.179532,0.293841,0.799368,-0.169235,-0.352699,0.300434,0.089924,-0.144185,-0.313045,0.268902,0.08904
3,4,UnitedStates,UnitedKingdom,0.735773,0.690864,0.795698,0.797915,0.739944,0.915494,0.60376,0.659663,0.761292,0.837529,0.703515,0.695373,0.790715,0.824823,0.571224,0.697585,0.780641,0.93302,0.804186,0.706813,-0.014253,-0.004809,0.057168,0.042809,0.049051,248299.91,57271.828,17577.66,12586.26,-3.014903,-2.861754,-3.151005,-0.095706,0.246288,0.310525,0.063984,0.260113,-0.053462,0.306608,-0.209088,-0.073479,0.18674,0.450543,-0.149256,0.043867,0.159045,0.393518,-0.228404,0.04876
4,1,UnitedStates,Austria,0.536507,-0.407823,-0.673405,-0.547397,-0.352058,-0.579379,-0.870111,-0.309694,-0.098499,-0.376418,-0.438464,-0.518989,-0.031084,-0.132819,-0.23547,-0.518574,0.069868,-0.413708,0.319772,-0.047467,0.000174,1.9e-05,0.003921,0.004422,0.003986,188532.7,7171.0288,10834.69,5688.1138,-5.524892,-5.541332,-5.421095,,,,,,,,,,,,,,,,,


#### Get correlation of data trends between processed data and Frankel Rose data

In [19]:
countries_corr = {}
for w in ["wt", "wm", "wx"]:
    countries_corr[str(w)] = dict()
    for i in range(len(list_countries)-1):
        country1 = list_countries[i]
        for j in range(i+1, len(list_countries)):
            country2 = list_countries[j]
            fr_df = FR_data[(FR_data["country_1"] == country1) & (FR_data["country_2"] == country2)][[w]]
            if fr_df.empty:
                fr_df = FR_data[(FR_data["country_1"] == country2) & (FR_data["country_2"] == country1)][[w]]
            dy_df_w = dy_df[(dy_df["country_1"] == country1) & (dy_df["country_2"] == country2)][[w]]
            countries_corr[str(w)][country1 + "_" + country2] = np.corrcoef(fr_df.values[:, 0], dy_df_w.values[:, 0])[0, 1]


#### average correlation between Frankel Rose data and processed data over all pairs of countries

In [20]:
print(f"average correlation on wt Trade Intensity by bilateral trade: {np.mean(list(countries_corr['wt'].values()))}")
print(f"average correlation on wm Trade Intensity by import: {np.mean(list(countries_corr['wx'].values()))}")
print(f"average correlation on wx Trade Intensity by export: {np.mean(list(countries_corr['wm'].values()))}")



average correlation on wt Trade Intensity by bilateral trade: 0.893264333774569
average correlation on wm Trade Intensity by import: 0.775888302158997
average correlation on wx Trade Intensity by export: 0.7766652026210833


In [21]:
countries_corr['wt']

{'Germany_UnitedStates': 0.9770786911939698,
 'Germany_France': 0.28844162907172766,
 'Germany_Australia': 0.9961080233099714,
 'Germany_Canada': 0.9969687660781941,
 'Germany_UnitedKingdom': 0.9984200735230419,
 'Germany_Austria': 0.9751662504169661,
 'Germany_Italy': 0.9093979465068153,
 'Germany_Denmark': 0.9897690412391955,
 'Germany_Norway': 0.9950249546535136,
 'Germany_Sweden': 0.9980070529260374,
 'Germany_Switzerland': 0.9515159745443632,
 'Germany_Japan': 0.9928532215946907,
 'Germany_Greece': 0.9673784511104396,
 'Germany_Finland': 0.9897371809450447,
 'Germany_Spain': 0.9976402667411836,
 'Germany_Ireland': 0.9953419696357155,
 'Germany_Portugal': 0.9976457645348246,
 'UnitedStates_France': 0.8819816919494773,
 'UnitedStates_Australia': 0.9035129518663678,
 'UnitedStates_Canada': 0.9649450793994412,
 'UnitedStates_UnitedKingdom': 0.8852938451445888,
 'UnitedStates_Austria': 0.9670053515023957,
 'UnitedStates_Italy': 0.9751633916232048,
 'UnitedStates_Denmark': 0.98641379014

## Analysis of economic concept data

### Employment data

In [23]:
employment_data_path = "/Users/victorgraff/Documents/2022:2023 ENSAE/Cours/Statapps/data_StatApp/oecd/Employment.csv"
employment = pd.read_csv(employment_data_path)
employment.head(2)

Unnamed: 0,LOCATION,Country,SUBJECT,Subject,MEASURE,Measure,FREQUENCY,Frequency,TIME,Time,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,Value,Flag Codes,Flags
0,AUS,Australia,LFEMTTFE,"Employed population, Aged 15 and over, Females",STSA,"Level, rate or quantity series, s.a.",Q,Quarterly,1965-Q1,Q1-1965,PER,Persons,3,Thousands,,,1365.406,,
1,AUS,Australia,LFEMTTFE,"Employed population, Aged 15 and over, Females",STSA,"Level, rate or quantity series, s.a.",Q,Quarterly,1965-Q2,Q2-1965,PER,Persons,3,Thousands,,,1384.917,,


In [27]:
employment = process_concept_data(employment)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Value"] = np.log(df["Value"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Year"] = df["TIME"].astype(str).str[:4]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Quarter"] = df["TIME"].astype(str).str[-1].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
T

### Countries with data strating in 1959

In [30]:
min_available_year = pd.DataFrame(employment.groupby("Country")["Year"].min())
min_available_year[min_available_year["Year"].astype(int) <= 1960]

Unnamed: 0_level_0,Year
Country,Unnamed: 1_level_1
Canada,1959
Japan,1959
United States,1959


### Unemployment data

In [31]:
path_unemployment = "/Users/victorgraff/Documents/2022:2023 ENSAE/Cours/Statapps/data_StatApp/oecd/UnemployedPopulation.csv"
unemploy = pd.read_csv(path_unemployment)

In [32]:
unemploy = process_concept_data(unemploy)

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Value"] = np.log(df["Value"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Year"] = df["TIME"].astype(str).str[:4]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Quarter"] = df["T

### Countries with data strating in 1959

In [33]:
min_available_year_unemp = pd.DataFrame(unemploy.groupby("Country")["Year"].min())
min_available_year_unemp[min_available_year_unemp["Year"].astype(int) <= 1959]

Unnamed: 0_level_0,Year
Country,Unnamed: 1_level_1
Canada,1959
France,1959
Japan,1959
United States,1959


In [34]:
unemploy.head()

Unnamed: 0,Value,TIME,Country,Year,Quarter,Month,Date
0,4.641888,1978-Q2,Australia,1978,2,4,1978-04-01
1,4.66356,1978-Q3,Australia,1978,3,7,1978-07-01
2,4.705699,1978-Q4,Australia,1978,4,10,1978-10-01
3,4.710126,1979-Q1,Australia,1979,1,1,1979-01-01
4,4.755773,1979-Q2,Australia,1979,2,4,1979-04-01


In [36]:
set_countries = dict()
for country in unemploy["Country"].unique():
    set_countries[country] = unemploy[unemploy["Country"] == country]
    set_countries[country]["de"] = set_countries[country]["Value"].diff(4)
    set_countries[country]["se"] = sm.tsa.filters.hpfilter(set_countries[country]["Value"], 1600)[0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  set_countries[country]["de"] = set_countries[country]["Value"].diff(4)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  set_countries[country]["se"] = sm.tsa.filters.hpfilter(set_countries[country]["Value"], 1600)[0]


In [38]:
set_countries["UnitedStates"] = set_countries.pop("United States")
set_countries["UnitedKingdom"] = set_countries.pop("United Kingdom")