In [1]:
import pandas as pd

## CPI 

In [25]:
cpi_df = pd.read_csv('cpi.csv')

cpi_df

#### Remove columns

In [26]:
cpi_df = cpi_df.drop(["Time", "Forecast", "Previous"], axis=1)
cpi_df

Unnamed: 0,Release Date,Actual
0,"Mar 10, 2024 (Feb)",35.70%
1,"Feb 08, 2024 (Jan)",29.80%
2,"Jan 10, 2024 (Dec)",33.70%
3,"Dec 10, 2023 (Nov)",34.60%
4,"Nov 11, 2023 (Oct)",35.80%
...,...,...
401,"May 01, 1990 (Apr)",10.50%
402,"Apr 01, 1990 (Mar)",18.32%
403,"Mar 01, 1990 (Feb)",21.45%
404,"Feb 01, 1990 (Jan)",26.34%


#### Extract just the year from the dataframe

In [27]:
cpi_df['Release Date'].dtype

dtype('O')

In [28]:
# Remove the extra part in the date strings and whitespace
cpi_df['Release Date'] = cpi_df['Release Date'].str.split('(', expand=True)[0].str.strip()
cpi_df['Date'] = pd.to_datetime(cpi_df['Release Date'], format='%b %d, %Y', errors='coerce')

## Remove ReleaseDate 
cpi_df.drop(['Release Date'], axis=1, inplace= True)
## modify the order of the columns
date_column = cpi_df.pop('Date')  
cpi_df.insert(cpi_df.columns.get_loc('Actual'), 'Date', date_column)

cpi_df.head()

Unnamed: 0,Date,Actual
0,2024-03-10,35.70%
1,2024-02-08,29.80%
2,2024-01-10,33.70%
3,2023-12-10,34.60%
4,2023-11-11,35.80%


In [29]:
# Check if there is empty cells
empty_cells_exist = cpi_df.isna().any().any()

if empty_cells_exist:
    print("there is empty cells")
else :
    print("There's no empty cells")

There's no empty cells


#### Split Date into year, month, day columns

In [30]:
cpi_df['year'] = cpi_df['Date'].dt.year
cpi_df['month'] = cpi_df['Date'].dt.month
cpi_df['day'] = cpi_df['Date'].dt.day

In [31]:
cpi_df = cpi_df.rename(columns={'Actual': 'cpi_rate'})


In [32]:
selected_col = cpi_df[['year', 'month', 'day', 'cpi_rate']]
# reverse order 
selected_col = selected_col[::-1]
selected_col.head()

Unnamed: 0,year,month,day,cpi_rate
405,1990,1,1,28.47%
404,1990,2,1,26.34%
403,1990,3,1,21.45%
402,1990,4,1,18.32%
401,1990,5,1,10.50%


#### Save it to new csv

In [33]:
selected_col.to_csv('inflationIndicatorsUpdates/cpi_data.csv', index=False)

## CPI base 2010-100

In [2]:
import csv

# Define the range of years from 1960 to 2022
years = list(range(1960, 2023))

# Split the given data by whitespace and convert it into a list of strings
cpi_values = [1.20872070863002, 1.21710447711338, 1.18055389751081, 1.1893684525188, 1.23291102876457, 1.4158627513308, 1.54380634865603, 1.55464228658054, 1.52859022194483, 1.58071240136806, 1.64019300612286, 1.6917019834377, 1.72726770586276, 1.81556880983419, 1.99756719633179, 2.19072586125566, 2.41675215932171, 2.72445697854963, 3.02627505751449, 3.32600824930488, 4.01845738799857, 4.43305305816039, 5.09016490527709, 5.90865700249978, 6.91527745758085, 7.75249323023021, 9.60257068310286, 11.4936619011958, 13.5238432796011, 16.3992648629815, 19.1471871327422, 22.9284779653364, 26.0553317615891, 29.2053672512273, 31.5868404447804, 36.5593136759471, 39.186869460822, 40.9994995628414, 42.5872361233875, 43.898709687758, 45.0768656084691, 46.1000010132971, 47.361868012585, 49.496835104023, 55.0754349700109, 57.7572765309499, 62.1725468094228, 67.9663872091587, 80.4156759553908, 89.8753703283582, 100, 110.064925987482, 117.892445724817, 129.056530012879, 142.052800661666, 156.784372636979, 178.441948479442, 231.094115427901, 264.375055382559, 288.572774376263, 303.131077181708, 318.936481308266, 363.254813484488]

# Years from 1960 to 2022
years = list(range(1960, 2023))

# Creating DataFrame
cpi_df = pd.DataFrame({'year': years, 'CPI': cpi_values})

cpi_df.to_csv('./inflationIndicatorsUpdates/CPI.csv')


<hr>

## Exchagne Rate

In [35]:
er_usd = pd.read_csv('Er_YoY.csv')
er_usd

Unnamed: 0,Release Date,Time,Actual,Forecast,Previous
0,03/27/2024,47.2817,47.8222,47.9015,47.1933
1,03/26/2024,47.8220,47.4040,47.9500,47.3459
2,03/25/2024,47.3997,46.7872,47.5250,46.5000
3,03/22/2024,46.7000,46.7500,46.7500,46.7500
4,03/21/2024,46.6000,46.9000,47.0900,46.5700
...,...,...,...,...,...
10018,06/06/1990,2.6967,2.6967,2.6967,2.6967
10019,06/05/1990,2.6967,2.6967,2.6967,2.6967
10020,06/04/1990,2.6967,2.6967,2.6967,2.6967
10021,06/01/1990,2.6892,2.6892,2.6892,2.6892


In [36]:
er_usd['Release Date'].dtype

dtype('O')

#### Remove columns

In [37]:
er_usd = er_usd.drop(["Time", "Forecast", "Previous"], axis=1)

In [38]:
# Remove the extra part in the date strings and whitespace
er_usd['Date'] = pd.to_datetime(er_usd['Release Date'], format='%m/%d/%Y', errors='coerce')

## Remove ReleaseDate 
er_usd.drop(['Release Date'], axis=1, inplace= True)
## modify the order of the columns
date_column = er_usd.pop('Date')  
er_usd.insert(er_usd.columns.get_loc('Actual'), 'Date', date_column)

er_usd['EGP'] = 1

er_usd

Unnamed: 0,Date,Actual,EGP
0,2024-03-27,47.8222,1
1,2024-03-26,47.4040,1
2,2024-03-25,46.7872,1
3,2024-03-22,46.7500,1
4,2024-03-21,46.9000,1
...,...,...,...
10018,1990-06-06,2.6967,1
10019,1990-06-05,2.6967,1
10020,1990-06-04,2.6967,1
10021,1990-06-01,2.6892,1


In [39]:
er_usd = er_usd.rename(columns= {'Actual': "USD"})

In [40]:
# Check if there is empty cells
empty_cells_exist = er_usd.isna().any().any()

if empty_cells_exist:
    print("there is empty cells")
else :
    print("There's no empty cells")

There's no empty cells


#### Split Date into year, month, day columns

In [41]:
er_usd['year'] = er_usd['Date'].dt.year
er_usd['month'] = er_usd['Date'].dt.month
er_usd['day'] = er_usd['Date'].dt.day

In [42]:
selected_col = er_usd[['year', 'month', 'day', 'EGP', 'USD']]
# reverse order 
selected_col = selected_col[::-1]
selected_col

Unnamed: 0,year,month,day,EGP,USD
10022,1990,5,31,1,2.6892
10021,1990,6,1,1,2.6892
10020,1990,6,4,1,2.6967
10019,1990,6,5,1,2.6967
10018,1990,6,6,1,2.6967
...,...,...,...,...,...
4,2024,3,21,1,46.9000
3,2024,3,22,1,46.7500
2,2024,3,25,1,46.7872
1,2024,3,26,1,47.4040


#### Save it to csv

In [43]:
selected_col.to_csv('./inflationIndicatorsUpdates/ExchangeRate_usd.csv', index= False)

<hr>

## GDP

In [44]:
gdp = pd.read_csv('gdp_growth_rates.csv')
gdp.head()

Unnamed: 0,Year,GDP Growth Rate
0,1990,5.667029
1,1991,1.125405
2,1992,4.472859
3,1993,2.900791
4,1994,3.973172


In [45]:
gdp = gdp.rename( columns={'GDP Growth Rate': 'gdp_rate', 'Year': 'year'})

In [46]:
gdp.to_csv('./inflationIndicatorsUpdates/GDP.csv', index=0)

In [53]:
selected_col.to_csv('./inflationIndicatorsUpdates/InterestRate.csv', index= False)

## Real InterestRate

In [23]:
import csv

RI_values = [-3.16791456635492, -3.11968024060413, 0.565092117483826, -2.16791456635492, -2.11968024060413, 1.56509211748383, -1.16791456635492, -2.16791456635492, -2.11968024060413, 1.56509211748383, 1.88245857948588, 3.43556713232915, 3.15863419574568, -0.674911055017413, 1.06753529927366, 3.20463879391189, 1.67092489114582, 9.10124138273943, 7.43541940172613, 4.55215763255254, 7.91322048878584, 3.56345275713916, 10.391898920672, 11.9358224011414, 8.91960097651462, 11.2145133742674, 10.3000319442348, 6.32702594810347, 1.52690342883876, 6.52364099171329, 4.88079671789458, -0.0776285947232149, 0.107853154662391, 0.710036320068467, 0.818721618124323, -0.563878053386473, -6.26271950249128, 3.29215715258359, 0.414129139027758, 1.54106846018941, 6.92201135491608, -8.75819408094445, -2.35870301315836, 2.19909578279478, 4.88781543020781, 4.36620669870869, 0.142669652477037]

# Define the range of years from 1960 to 2022
years = list(range(1976, 2023))

# Creating DataFrame
RI_df = pd.DataFrame({'year': years, 'RealInterestRate': RI_values})

RI_df.to_csv('./inflationIndicatorsUpdates/RealInterestRate.csv')


<hr>

## Unemployment Rate

In [30]:
UR = pd.read_csv('./unemployment_rates.csv')
UR.head()

Unnamed: 0,Year,Unemployment Rate
0,1991,9.38
1,1992,8.92
2,1993,10.92
3,1994,10.93
4,1995,11.04


In [31]:
UR = UR.rename( columns={ 'Year': 'year', 'Unemployment Rate': 'UnemploymentRate'} )

In [32]:
UR.to_csv('./inflationIndicatorsUpdates/UnemploymentRate.csv', index= False)

<hr>

## XAU

In [11]:
xau = pd.read_csv('xau.csv')
xau

Unnamed: 0,Release Date,Time,Actual,Forecast,Previous
0,04/19/2024,116242.78,115015.51,116852.05,114727.94
1,04/18/2024,115020.34,114847.77,115693.97,114670.74
2,04/17/2024,114302.76,115568.08,116944.59,114210.01
3,04/16/2024,115426.80,114982.59,116319.74,114079.91
4,04/15/2024,114811.92,111520.89,115201.47,111514.56
...,...,...,...,...,...
1613,02/09/2018,23284.88,23316.30,23397.84,23192.22
1614,02/08/2018,23327.15,23296.90,23388.22,23058.78
1615,02/07/2018,23310.12,23371.58,23596.19,23189.53
1616,02/06/2018,23378.51,23661.17,23842.99,23304.58


In [12]:
xau = xau.drop(["Time", "Forecast", "Previous"], axis=1)
xau

Unnamed: 0,Release Date,Actual
0,04/19/2024,115015.51
1,04/18/2024,114847.77
2,04/17/2024,115568.08
3,04/16/2024,114982.59
4,04/15/2024,111520.89
...,...,...
1613,02/09/2018,23316.30
1614,02/08/2018,23296.90
1615,02/07/2018,23371.58
1616,02/06/2018,23661.17


In [13]:
xau['Date'] = pd.to_datetime(xau['Release Date'], format='%m/%d/%Y', errors='coerce')
xau['year'] = xau['Date'].dt.year
xau['month'] = xau['Date'].dt.month
xau['day'] = xau['Date'].dt.day

In [16]:
xau = xau.rename( columns={ "Actual": "XAU_Price"})
xau

Unnamed: 0,Release Date,XAU_Price,Date,year,month,day
0,04/19/2024,115015.51,2024-04-19,2024,4,19
1,04/18/2024,114847.77,2024-04-18,2024,4,18
2,04/17/2024,115568.08,2024-04-17,2024,4,17
3,04/16/2024,114982.59,2024-04-16,2024,4,16
4,04/15/2024,111520.89,2024-04-15,2024,4,15
...,...,...,...,...,...,...
1613,02/09/2018,23316.30,2018-02-09,2018,2,9
1614,02/08/2018,23296.90,2018-02-08,2018,2,8
1615,02/07/2018,23371.58,2018-02-07,2018,2,7
1616,02/06/2018,23661.17,2018-02-06,2018,2,6


In [23]:
selected_col = xau[['year', 'month', 'day', 'XAU_Price']]
# reverse order 
selected_col = selected_col[::-1]
selected_col


Unnamed: 0,year,month,day,XAU_Price
1617,2018,2,5,23549.41
1616,2018,2,6,23661.17
1615,2018,2,7,23371.58
1614,2018,2,8,23296.90
1613,2018,2,9,23316.30
...,...,...,...,...
4,2024,4,15,111520.89
3,2024,4,16,114982.59
2,2024,4,17,115568.08
1,2024,4,18,114847.77


In [24]:
selected_col.to_csv('./inflationIndicatorsUpdates/XAU2.csv')

In [33]:
xau = pd.read_csv('./inflationIndicatorsUpdates/XAU.csv')
xau2 = pd.read_csv('./inflationIndicatorsUpdates/XAU2.csv')

merged_df = pd.concat([xau, xau2], ignore_index=True)

merged_df.drop_duplicates(subset=['year', 'month', 'day', 'XAU_Price'], inplace=True)

# Reset the index after dropping duplicates
merged_df.reset_index(drop=True, inplace=True)


merged_df.to_csv('./inflationIndicatorsUpdates/XAU_me.csv')

## EUR / USD

In [21]:
eu = pd.read_csv('./EU.csv')

In [22]:
eu = eu.drop(["Open", "High", "Low"], axis=1)

In [23]:
eu['Date'] = pd.to_datetime(eu['Date'], format='%m/%d/%Y', errors='coerce')
eu['year'] = eu['Date'].dt.year
eu['month'] = eu['Date'].dt.month
eu['day'] = eu['Date'].dt.day

In [24]:
selected_col = eu[['year', 'month', 'day', 'Price']]
# reverse order 
selected_col = selected_col[::-1]
selected_col


Unnamed: 0,year,month,day,Price
1844,1990,3,20,1.1942
1843,1990,3,21,1.1902
1842,1990,3,22,1.1968
1841,1990,3,23,1.1915
1840,1990,3,26,1.1964
...,...,...,...,...
4,1997,4,14,1.1310
3,1997,4,15,1.1313
2,1997,4,16,1.1300
1,1997,4,17,1.1337


In [25]:
eu = eu.rename( columns={ "Price": "EUR/USD"})
eu = eu[['year', 'month', 'day', 'EUR/USD']]

In [26]:
eu.to_csv('./inflationIndicatorsUpdates/EUR_USD.csv', index= False)

## Collecte Datasets in one Dataset

In [39]:
cpi = pd.read_csv('./inflationIndicatorsUpdates/cpi_data.csv')
IR = pd.read_csv('./inflationIndicatorsUpdates/InterestRate.csv')
gdp = pd.read_csv('./inflationIndicatorsUpdates/GDP.csv')
er_usd = pd.read_csv('./inflationIndicatorsUpdates/ExchangeRate_usd.csv')
er_eur = pd.read_csv('./inflationIndicatorsUpdates/ExchangeRate_eur.csv')
unem = pd.read_csv('./inflationIndicatorsUpdates/UnemploymentRate.csv')
xau = pd.read_csv('./inflationIndicatorsUpdates/XAU.csv')

In [40]:
print("CPI: "+ str(len(cpi)))
print("IR: "+ str(len(IR)))
print("gdp: "+ str(len(gdp)))
print("er_usd: "+ str(len(er_usd)))
print("er_eur: "+ str(len(er_eur)))
print("unem: "+ str(len(unem)))
print("xau: "+ str(len(xau)))

CPI: 406
IR: 119
gdp: 34
er_usd: 9999
er_eur: 8016
unem: 33
xau: 174


In [56]:
import pandas as pd

# Read datasets
cpi = pd.read_csv('./inflationIndicatorsUpdates/cpi_data.csv')
IR = pd.read_csv('./inflationIndicatorsUpdates/InterestRate.csv')
gdp = pd.read_csv('./inflationIndicatorsUpdates/GDP.csv')
er_usd = pd.read_csv('./inflationIndicatorsUpdates/ExchangeRate_usd.csv')
er_eur = pd.read_csv('./inflationIndicatorsUpdates/ExchangeRate_eur.csv')
unem = pd.read_csv('./inflationIndicatorsUpdates/UnemploymentRate.csv')
xau = pd.read_csv('./inflationIndicatorsUpdates/XAU.csv')

# Convert 'year', 'month', and 'day' columns to integers
er_usd['year'] = er_usd['year'].astype(int)
er_usd['month'] = er_usd['month'].astype(int)
er_usd['day'] = er_usd['day'].astype(int)

er_eur['year'] = er_eur['year'].astype(int)
er_eur['month'] = er_eur['month'].astype(int)
er_eur['day'] = er_eur['day'].astype(int)

cpi['year'] = cpi['year'].astype(int)
cpi['month'] = cpi['month'].astype(int)
cpi['day'] = cpi['day'].astype(int)

xau['year'] = xau['year'].astype(int)
xau['month'] = xau['month'].astype(int)
xau['day'] = xau['day'].astype(int)

IR['year'] = IR['year'].astype(int)
IR['month'] = IR['month'].astype(int)
IR['day'] = IR['day'].astype(int)

gdp['year'] = gdp['year'].astype(int)

unem['year'] = unem['year'].astype(int)

# Concatenate the datasets along the 'Year' column
InflationIndicator_df = (
    er_usd.merge(er_eur, on=['year', 'month', 'day'], how='outer', suffixes=('_er_usd', 'er_eur'))
    .merge(cpi, on=['year', 'month', 'day'], how='outer', suffixes=('_er_eur', '_cpi'))
    .merge(xau, on=['year', 'month', 'day'], how='outer', suffixes=('_cpi', '_xau'))
    .merge(IR, on=['year', 'month', 'day'], how='outer', suffixes=('_xau', '_IR'))
    .merge(gdp, on=['year'], how='outer', suffixes=('_IR', '_gdp'))
    .merge(unem, on=['year'], how='outer', suffixes=('_gdp', '_unem'))
)

# Optionally, sort the combined DataFrame by the 'Year' column
InflationIndicator_df.sort_values(['year'], inplace=True)

# Display the first few rows of the combined DataFrame
InflationIndicator_df.head()


Unnamed: 0.1,year,month,day,EGP_er_usd,USD,EGPer_eur,EUR,cpi_rate,Unnamed: 0,XAU_Price,InterestRate,gdp_rate,UnemploymentRate
0,1990,5,31,1.0,2.6892,,,,,,,5.667029,
87,1990,10,2,1.0,2.7567,,,,,,,5.667029,
86,1990,10,1,1.0,2.7567,,,17.96%,,,,5.667029,
85,1990,9,28,1.0,2.7467,,,,,,,5.667029,
84,1990,9,27,1.0,2.7467,,,,,,,5.667029,


0       1
1       2
2       3
3       4
4       5
       ..
401    11
402    12
403     1
404     2
405     3
Name: month, Length: 406, dtype: int64