In [None]:
import pandas as pd

# Skip metadata rows and load the CSV file
gdp_data = pd.read_csv('D:/week10 data/gdp_data.csv', skiprows=4, encoding='utf-8-sig')

# Inspect the columns and the first few rows
print(gdp_data.columns)
print(gdp_data.head())

# Filter out columns that are not years
year_columns = [str(year) for year in range(1987, 2022)]
gdp_data = gdp_data[['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'] + year_columns]

# Melt the dataframe to reshape it
gdp_data = gdp_data.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'], 
                         var_name='Year', value_name='GDP')

# Convert 'Year' to datetime and rename to 'Date'
gdp_data['Year'] = pd.to_datetime(gdp_data['Year'], format='%Y', errors='coerce')
gdp_data.rename(columns={'Year': 'Date'}, inplace=True)

# Drop rows with NaT in 'Date' column (caused by non-year values)
gdp_data.dropna(subset=['Date'], inplace=True)

# Print the cleaned and reshaped dataframe
print(gdp_data.head())

# Filter for a specific country, e.g., 'United States'
country_gdp_data = gdp_data[gdp_data['Country Name'] == 'United States' 'United Arab Emirates' 'Saudi Arabia']

# Print the filtered data
print(country_gdp_data.head())


Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
       '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022',
       '2023', 'Unnamed: 68'],
      dtype='object')
                  Country Name Country Code     Indicator Name  \
0                        Aruba          ABW  GDP (current US$)   
1  Africa Eastern and Southern          AFE  GDP (current US$)   
2                  Afghanistan          AFG  GDP (current US$)   
3   Africa Western and Central          AFW  GDP (curre

In [9]:
# Save the cleaned and reshaped GDP data to a new CSV file
gdp_data.to_csv('D:/week10 data/cleaned_gdp_data.csv', index=False)
