# 3: Explore Other Potential Factors Influencing Oil Prices:


 Load and explore GDP Growth Rates data, then proceed with Inflation Rates and Interest Rates.








# Load GDP Growth Rates


In [2]:
import pandas as pd
gdp_growth = pd.read_csv('../data/external/gdp_growth.csv', skiprows=4)
print(gdp_growth.head())

                  Country Name Country Code         Indicator Name  \
0                        Aruba          ABW  GDP growth (annual %)   
1  Africa Eastern and Southern          AFE  GDP growth (annual %)   
2                  Afghanistan          AFG  GDP growth (annual %)   
3   Africa Western and Central          AFW  GDP growth (annual %)   
4                       Angola          AGO  GDP growth (annual %)   

      Indicator Code  1960      1961      1962      1963      1964      1965  \
0  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   
1  NY.GDP.MKTP.KD.ZG   NaN  0.460106  7.868013  5.616400  4.668135  5.138990   
2  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   
3  NY.GDP.MKTP.KD.ZG   NaN  1.873455  3.707643  7.145784  5.406403  4.102491   
4  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   

   ...      2015      2016      2017      2018      2019       2020  \
0  ... -0.623626  1.719625 

# Load Inflation Rates


In [3]:
import pandas as pd
inflation = pd.read_csv('../data/external/inflation.csv', skiprows=4)
print(inflation.head())

                  Country Name Country Code  \
0                        Aruba          ABW   
1  Africa Eastern and Southern          AFE   
2                  Afghanistan          AFG   
3   Africa Western and Central          AFW   
4                       Angola          AGO   

                          Indicator Name  Indicator Code  1960  1961  1962  \
0  Inflation, consumer prices (annual %)  FP.CPI.TOTL.ZG   NaN   NaN   NaN   
1  Inflation, consumer prices (annual %)  FP.CPI.TOTL.ZG   NaN   NaN   NaN   
2  Inflation, consumer prices (annual %)  FP.CPI.TOTL.ZG   NaN   NaN   NaN   
3  Inflation, consumer prices (annual %)  FP.CPI.TOTL.ZG   NaN   NaN   NaN   
4  Inflation, consumer prices (annual %)  FP.CPI.TOTL.ZG   NaN   NaN   NaN   

   1963  1964  1965  ...      2015       2016       2017       2018  \
0   NaN   NaN   NaN  ...  0.474764  -0.931196  -1.028282   3.626041   
1   NaN   NaN   NaN  ...  5.245878   6.571396   6.399343   4.720805   
2   NaN   NaN   NaN  ... -0.661709 

# Load Exchange Rates

In [6]:
import pandas as pd
exchange_rate = pd.read_csv('../data/external/exchangerates.csv', skiprows=4)
print(exchange_rate.head())

                  Country Name Country Code  \
0                        Aruba          ABW   
1  Africa Eastern and Southern          AFE   
2                  Afghanistan          AFG   
3   Africa Western and Central          AFW   
4                       Angola          AGO   

                                      Indicator Name Indicator Code  \
0  Official exchange rate (LCU per US$, period av...    PA.NUS.FCRF   
1  Official exchange rate (LCU per US$, period av...    PA.NUS.FCRF   
2  Official exchange rate (LCU per US$, period av...    PA.NUS.FCRF   
3  Official exchange rate (LCU per US$, period av...    PA.NUS.FCRF   
4  Official exchange rate (LCU per US$, period av...    PA.NUS.FCRF   

           1960          1961          1962          1963          1964  \
0           NaN           NaN           NaN           NaN           NaN   
1           NaN           NaN           NaN           NaN           NaN   
2  1.719656e+01  1.719656e+01  1.719656e+01  3.510964e+01  3.8692

# Data preprocessing


In [8]:
import pandas as pd

def preprocess_economic_data(file_path, country, data_type):

    # Load the dataset
    df = pd.read_csv(file_path, skiprows=4)

    # Filter for the specified country; consider adding a check for 'All' or specific country handling
    df = df[df['Country Name'] == country] if country != 'All' else df

    # Drop unnecessary columns
    df.drop(columns=['Country Code', 'Indicator Name', 'Indicator Code'], inplace=True)

    # Remove columns with names that are not numeric (e.g., "Unnamed: 68")
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

    # Melt the dataframe to long format
    df = df.melt(id_vars=['Country Name'], var_name='Year', value_name='Value')

    # Convert 'Year' to numeric, forcing errors to NaN, then drop NaNs
    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
    df.dropna(subset=['Year'], inplace=True)

    # Pivot the DataFrame to have years as index
    df = df.pivot(index='Year', columns='Country Name', values='Value')

    # Drop columns (years) with all NaN values
    df.dropna(axis=0, how='all', inplace=True)

    # Debug print to show the DataFrame after preprocessing
    print(f"DataFrame after preprocessing ({data_type}):")
    print(df.head())

    return df

# File paths
gdp_growth_path = '../data/external/gdp_growth.csv'
inflation_path = '../data/external/inflation.csv'
exchange_rate_path = '../data/external/exchangerates.csv'

# Define a list of datasets to preprocess
datasets = [
    (gdp_growth_path, 'All', 'GDP Growth Rates'),
    (inflation_path, 'All', 'Inflation Rates'),
    (exchange_rate_path, 'All', 'Exchange Rates')
]

# Preprocess each dataset
cleaned_dataframes = {}
for path, country, data_type in datasets:
    cleaned_dataframes[data_type] = preprocess_economic_data(path, country, data_type)

# Display the cleaned data
for data_type, df in cleaned_dataframes.items():
    print(f"\n{data_type} (All Country):")
    print(df.head())


DataFrame after preprocessing (GDP Growth Rates):
Country Name  Afghanistan  Africa Eastern and Southern  \
Year                                                     
1961                  NaN                     0.460106   
1962                  NaN                     7.868013   
1963                  NaN                     5.616400   
1964                  NaN                     4.668135   
1965                  NaN                     5.138990   

Country Name  Africa Western and Central  Albania    Algeria  American Samoa  \
Year                                                                           
1961                            1.873455      NaN -13.605441             NaN   
1962                            3.707643      NaN -19.685042             NaN   
1963                            7.145784      NaN  34.313729             NaN   
1964                            5.406403      NaN   5.839413             NaN   
1965                            4.102491      NaN   6.206898   

# Merge Economic Indicators with Brent Oil Prices


In [None]:
import pandas as pd

# Function to preprocess the economic data
def preprocess_economic_data(file_path, country, data_type):
    # Load the dataset
    df = pd.read_csv(file_path, skiprows=4)
    
    # Filter for the specific country
    df = df[df['Country Name'] == country]
    
    # Drop unnecessary columns
    df = df.drop(columns=['Country Code', 'Indicator Name', 'Indicator Code'])
    
    # Remove columns with names that are not numeric (e.g., "Unnamed: 68")
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    
    # Melt the dataframe
    df = df.melt(id_vars=['Country Name'], var_name='Year', value_name='Value')
    
    # Convert 'Year' to numeric, forcing errors to NaN, then drop NaNs
    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
    df = df.dropna(subset=['Year'])
    
    # Pivot the dataframe
    df = df.pivot(index='Year', columns='Country Name', values='Value')
    
    # Drop columns with all NaN values
    df = df.dropna(axis=0, how='all')
    
    # Debug print to identify the data type and show the DataFrame
    print(f"DataFrame after preprocessing ({data_type}):")
    print(df.head())
    
    return df

# Function to preprocess Brent Oil Prices
def preprocess_brent_oil_prices(file_path):
    # Load the dataset
    df = pd.read_csv(file_path)
    
    # Ensure the 'Date' column is in datetime format
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Extract year from the date
    df['Year'] = df['Date'].dt.year
    
    # Drop the 'Date' column and any other unnecessary columns
    df = df.drop(columns=['Date'])
    
    # Debug print to identify the data type and show the DataFrame
    print("DataFrame after preprocessing (Brent Oil Prices):")
    print(df.head())
    
    return df

# File paths
gdp_growth = '../data/external/gdp_growth.csv'
inflation = '../data/external/inflation.csv'
exchange_rate = '../data/external/exchangerates.csv'
brent_oil_prices_path = '../data/raw/BrentOilPrices.csv'

# Preprocess each dataset for All Country
country = 'All'
gdp_growth_clean = preprocess_economic_data(gdp_growth, country, 'GDP Growth Rates')
inflation_rates_clean = preprocess_economic_data(inflation, country, 'Inflation Rates')
interest_rates_clean = preprocess_economic_data(exchange_rate, country, 'Exchange Rates')

# Preprocess Brent Oil Prices
brent_oil_prices_clean = preprocess_brent_oil_prices(brent_oil_prices_path)

# Merge datasets on the year
merged_data_gdp = pd.merge(gdp_growth_clean.reset_index(), brent_oil_prices_clean, on='Year', how='outer')
merged_data_inflation = pd.merge(inflation_rates_clean.reset_index(), brent_oil_prices_clean, on='Year', how='outer')
merged_data_interest = pd.merge(interest_rates_clean.reset_index(), brent_oil_prices_clean, on='Year', how='outer')

# Display the merged data
print("Merged GDP Growth Rates with Brent Oil Prices:")
print(merged_data_gdp.head())

print("\nMerged Inflation Rates with Brent Oil Prices:")
print(merged_data_inflation.head())

print("\nMerged Interest Rates with Brent Oil Prices:")
print(merged_data_interest.head())
