<a href="https://colab.research.google.com/github/jada-ke/CodeJam14/blob/main/gdpData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Mount Google Drive**

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# GDP Data Collection <br>
**Source**: OECD <br>
**Dates:** January 01, 2016 to November 22, 2024 <br>
- Annual Data 2016 to 2023 <br>
- Quarterly Data 2024 <br>
   - **USA:** Q1, Q2, Q3 <br>
   - **Canada:** Q1

In [8]:
pip install requests pandas




**Sources:** annual_gdp.csv, quarterly_gdp.csv
1. filtered relevant data <br>
2. combined data

In [13]:
import pandas as pd

# Load the annual GDP data (2016-2023) for US and Canada
annual_file_path = '/content/drive/MyDrive/Ai4Ducks/hack/CodeJam/Data/Collection/annual_gdp.csv'
annual_df = pd.read_csv(annual_file_path)

# Load the quarterly GDP data for US and Canada (2024 Q1, Q2, Q3)
quarterly_file_path = '/content/drive/MyDrive/Ai4Ducks/hack/CodeJam/Data/Collection/quarterly_gdp.csv'
quarterly_df = pd.read_csv(quarterly_file_path)

# Inspect the columns to understand the structure (if you haven't already)
print(annual_df.columns)
print(quarterly_df.columns)

# Filter for USA and Canada
annual_df = annual_df[annual_df['REF_AREA'].isin(['USA', 'CAN'])]

# We need to focus on the 'TIME_PERIOD' and 'OBS_VALUE' for GDP
annual_gdp_df = annual_df[['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE']]
annual_gdp_df = annual_gdp_df.rename(columns={'REF_AREA': 'Country', 'TIME_PERIOD': 'Year', 'OBS_VALUE': 'GDP'})

# Now for the quarterly data, filter for the same countries (USA and Canada)
quarterly_df = quarterly_df[quarterly_df['REF_AREA'].isin(['USA', 'CAN'])]

# Filter columns needed for merging (same as annual data)
quarterly_gdp_df = quarterly_df[['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE']]
quarterly_gdp_df = quarterly_gdp_df.rename(columns={'REF_AREA': 'Country', 'TIME_PERIOD': 'Year', 'OBS_VALUE': 'GDP'})

# Convert the 'TIME_PERIOD' to year-quarter format if necessary
# If the 'TIME_PERIOD' is in a date format like '2024Q1', extract just the year part or create a new 'Year' column
quarterly_gdp_df['Year'] = quarterly_gdp_df['Year'].str[:4]  # Extract the year part if the format is 'YYYYQ1'

# To combine the quarterly data into yearly data, we can sum or average the GDP for the year
quarterly_gdp_df = quarterly_gdp_df.groupby(['Country', 'Year']).agg({'GDP': 'sum'}).reset_index()

# Now combine the annual and adjusted quarterly data into one DataFrame
combined_df = pd.concat([annual_gdp_df, quarterly_gdp_df], ignore_index=True)

# Ensure the combined data is sorted by country and year
combined_df = combined_df.sort_values(by=['Country', 'Year'])

# Check the combined DataFrame
print(combined_df.head())

# Save the combined data to a new CSV file
combined_df.to_csv('/content/drive/MyDrive/Ai4Ducks/hack/CodeJam/Data/Collection/combined_gdp.csv', index=False)


Index(['STRUCTURE', 'STRUCTURE_ID', 'STRUCTURE_NAME', 'ACTION', 'FREQ',
       'Frequency of observation', 'ADJUSTMENT', 'Adjustment', 'REF_AREA',
       'Reference area', 'SECTOR', 'Institutional sector',
       'COUNTERPART_SECTOR', 'Counterpart institutional sector', 'TRANSACTION',
       'Transaction', 'INSTR_ASSET',
       'Financial instruments and non-financial assets', 'ACTIVITY',
       'Economic activity', 'EXPENDITURE', 'Expenditure', 'UNIT_MEASURE',
       'Unit of measure', 'PRICE_BASE', 'Price base', 'TRANSFORMATION',
       'Transformation', 'TABLE_IDENTIFIER', 'Table identifier', 'TIME_PERIOD',
       'Time period', 'OBS_VALUE', 'Observation value', 'REF_YEAR_PRICE',
       'Price reference year', 'BASE_PER', 'Base period', 'CONF_STATUS',
       'Confidentiality status', 'DECIMALS', 'Decimals', 'OBS_STATUS',
       'Observation status', 'UNIT_MULT', 'Unit multiplier', 'CURRENCY',
       'Currency'],
      dtype='object')
Index(['STRUCTURE', 'STRUCTURE_ID', 'STRUCTURE_NA