# Extract GPD data using API and save it as a csv file after cleanning.

In [1]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd

# Replace 'Your-36Character-Key' with your actual BEA API key
api_url = "https://apps.bea.gov/api/data/?&UserID=FF7860BD-7122-4D8E-AD19-FC94BA75EA24&method=GetData&DataSetName=NIPA&TableName=T10102&Frequency=A,Q&Year=ALL&ResultFormat=xml"

# Send the request to the BEA API
response = requests.get(api_url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the XML response
    root = ET.fromstring(response.content)

    # Initialize an empty list to store each data row (as a dict)
    data_rows = []

    # Iterate over each data item and extract the attributes to a dict
    for item in root.findall('./Results/Data'):
        data_rows.append(item.attrib)

    # Convert the list of dictionaries to a pandas DataFrame
    df = pd.DataFrame(data_rows)

    # Display the first few rows of the DataFrame to verify
    print(df.head())
else:
    print(f"Failed to retrieve data, status code: {response.status_code}")


  NoteRef DataValue UNIT_MULT            METRIC_NAME  \
0  T10102      -8.5         0  Fisher Quantity Index   
1  T10102      -6.4         0  Fisher Quantity Index   
2  T10102     -12.9         0  Fisher Quantity Index   
3  T10102      -1.2         0  Fisher Quantity Index   
4  T10102      10.8         0  Fisher Quantity Index   

                       CL_UNIT TimePeriod         LineDescription LineNumber  \
0  Percent change, annual rate       1930  Gross domestic product          1   
1  Percent change, annual rate       1931  Gross domestic product          1   
2  Percent change, annual rate       1932  Gross domestic product          1   
3  Percent change, annual rate       1933  Gross domestic product          1   
4  Percent change, annual rate       1934  Gross domestic product          1   

  SeriesCode TableName  
0     A191RL    T10102  
1     A191RL    T10102  
2     A191RL    T10102  
3     A191RL    T10102  
4     A191RL    T10102  


In [2]:
df

Unnamed: 0,NoteRef,DataValue,UNIT_MULT,METRIC_NAME,CL_UNIT,TimePeriod,LineDescription,LineNumber,SeriesCode,TableName
0,T10102,-8.5,0,Fisher Quantity Index,"Percent change, annual rate",1930,Gross domestic product,1,A191RL,T10102
1,T10102,-6.4,0,Fisher Quantity Index,"Percent change, annual rate",1931,Gross domestic product,1,A191RL,T10102
2,T10102,-12.9,0,Fisher Quantity Index,"Percent change, annual rate",1932,Gross domestic product,1,A191RL,T10102
3,T10102,-1.2,0,Fisher Quantity Index,"Percent change, annual rate",1933,Gross domestic product,1,A191RL,T10102
4,T10102,10.8,0,Fisher Quantity Index,"Percent change, annual rate",1934,Gross domestic product,1,A191RL,T10102
...,...,...,...,...,...,...,...,...,...,...
10421,T10102,0.43,0,Quantity Contributions,Level,2023,State and local,26,A829RY,T10102
10422,T10102,0.49,0,Quantity Contributions,Level,2023Q1,State and local,26,A829RY,T10102
10423,T10102,0.50,0,Quantity Contributions,Level,2023Q2,State and local,26,A829RY,T10102
10424,T10102,0.53,0,Quantity Contributions,Level,2023Q3,State and local,26,A829RY,T10102


In [3]:
import pandas as pd

# Convert 'TimePeriod' to string in case it's not already
df['TimePeriod'] = df['TimePeriod'].astype(str)

# Create a boolean mask for rows where 'TimePeriod' has a length of 4 (indicating just a year)
is_year = df['TimePeriod'].str.len() == 4

# Filter the DataFrame based on this mask and create a copy to avoid SettingWithCopyWarning
year_df = df[is_year].copy()

# Now safely convert 'TimePeriod' to integer for comparison without causing the warning
year_df['Year'] = year_df['TimePeriod'].astype(int)
year_df['GDP'] = year_df['DataValue'].astype(float)

# Filter for years greater than 2000
filtered_df = year_df[year_df['Year'] > 2000]
filtered_df = filtered_df[filtered_df['LineDescription'] == 'Gross domestic product']

# Display the filtered DataFrame, adjusting columns as needed
df_csv = filtered_df[['Year', 'GDP', 'LineDescription']]

In [4]:
# Save the DataFrame to a CSV file
df_csv.to_csv('./gdp_data.csv', index=False)