In [19]:
import pandas as pd
# Time series data - 2020 with merged covid, gdp, democracy scores

# Load the datasets
covid_data = pd.read_csv("COVID-19 Time Series Data.csv")
gdp_data = pd.read_csv("gdp_per_capita.csv", delimiter=',', skiprows=3, on_bad_lines='warn', keep_default_na=True)
democracy_data = pd.read_csv("DemocracyMatrix_v4.csv")

# Convert the 'Date' column in the COVID dataset to datetime format
covid_data['Date'] = pd.to_datetime(covid_data['Date'])

# Filter the COVID data for the date range from January 22, 2020, to January 22, 2022
start_date = '2020-01-22'
end_date = '2022-01-22'
covid_filtered = covid_data[(covid_data['Date'] >= start_date) & (covid_data['Date'] <= end_date)]

# Create 'Year' and 'Month' columns from the 'Date' column
covid_filtered['Year'] = covid_filtered['Date'].dt.year
covid_filtered['Month'] = covid_filtered['Date'].dt.month

# Drop the 'Province/State' column and unnecessary columns
covid_filtered = covid_filtered.drop(columns=['Province/State', 'Confirmed', 'Recovered'])

# Rename 'Country/Region' column
covid_filtered = covid_filtered.rename(columns={'Country/Region': 'Country'})

# Melt the GDP data to have a 'Year' column
gdp_melted = gdp_data.melt(id_vars='Country Name', value_vars=['2020', '2021', '2022'],
                           var_name='Year', value_name='GDP_per_Capita')

# Convert the 'Year' column to numeric and rename 'Country Name' column
gdp_melted['Year'] = pd.to_numeric(gdp_melted['Year'])
gdp_melted = gdp_melted.rename(columns={'Country Name': 'Country'})

# Filter GDP data for the years 2020 to 2022
gdp_filtered = gdp_melted[gdp_melted['Year'].isin([2020, 2021, 2022])]

# Rename columns in the democracy dataset
democracy_data = democracy_data.rename(columns={'country': 'Country', 'year': 'Year'})

# Filter democracy data for the years 2020 to 2022
democracy_filtered = democracy_data[democracy_data['Year'].isin([2020, 2021, 2022])]

# Merge the COVID dataset with the GDP dataset on 'Country' and 'Year'
merged_data = pd.merge(covid_filtered, gdp_filtered, how='inner', on=['Country', 'Year'])

# Merge the above result with democracy data on 'Country' and 'Year'
final_merged_data = pd.merge(merged_data, democracy_filtered, how='inner', on=['Country', 'Year'])

# Group by 'Country', 'Year', and 'Date' to aggregate the data
grouped_data = final_merged_data.groupby(['Country', 'Year', 'Date']).agg({
    'Deaths': 'sum',  # Sum deaths for each country and date
    'GDP_per_Capita': 'mean',  # Average GDP per capita
    'total_index_context': 'mean',  # Average democracy scores
    'classification_context': 'first', # Get the first classification context
}).reset_index()

# Rename columns for clarity
grouped_data = grouped_data.rename(columns={
    'total_index_context': 'Democracy_Scores',
    'classification_context': 'Democracy_Classification'
})

# Export the final merged DataFrame to a CSV file
grouped_data.to_csv('time_series_data.csv', index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  covid_filtered['Year'] = covid_filtered['Date'].dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  covid_filtered['Month'] = covid_filtered['Date'].dt.month


           Country  Year       Date  Deaths  GDP_per_Capita  Democracy_Scores  \
0      Afghanistan  2020 2020-01-22       0      512.055098          0.357568   
1      Afghanistan  2020 2020-01-23       0      512.055098          0.357568   
2      Afghanistan  2020 2020-01-24       0      512.055098          0.357568   
3      Afghanistan  2020 2020-01-25       0      512.055098          0.357568   
4      Afghanistan  2020 2020-01-26       0      512.055098          0.357568   
...            ...   ...        ...     ...             ...               ...   
52090     Zimbabwe  2020 2020-12-27     349     1372.696674          0.336042   
52091     Zimbabwe  2020 2020-12-28     354     1372.696674          0.336042   
52092     Zimbabwe  2020 2020-12-29     359     1372.696674          0.336042   
52093     Zimbabwe  2020 2020-12-30     360     1372.696674          0.336042   
52094     Zimbabwe  2020 2020-12-31     363     1372.696674          0.336042   

      Democracy_Classificat

In [20]:
# Time series data with only COVID 19 - 2020 to 2022

print(covid_filtered)
# Export the final merged DataFrame to a CSV file
grouped_data.to_csv('cleaned_covid_data.csv', index=False)


             Date      Country  Deaths  Year  Month
0      2020-01-22  Afghanistan       0  2020      1
1      2020-01-23  Afghanistan       0  2020      1
2      2020-01-24  Afghanistan       0  2020      1
3      2020-01-25  Afghanistan       0  2020      1
4      2020-01-26  Afghanistan       0  2020      1
...           ...          ...     ...   ...    ...
231655 2022-01-18     Zimbabwe    5258  2022      1
231656 2022-01-19     Zimbabwe    5266  2022      1
231657 2022-01-20     Zimbabwe    5276  2022      1
231658 2022-01-21     Zimbabwe    5288  2022      1
231659 2022-01-22     Zimbabwe    5292  2022      1

[207888 rows x 5 columns]


In [None]:
# Dataset with COVID deaths, GDP, and democracy scores
# Filter COVID dataset to 2020
covid_2020 = covid_filtered[covid_filtered['Year'] == 2020]

# Reshape the GDP data to have a 'Year' column
gdp_melted = gdp_data.melt(id_vars='Country Name', value_vars=['2020', '2021', '2022'],
                           var_name='Year', value_name='GDP_per_Capita')

# Convert the 'Year' column to numeric
gdp_melted['Year'] = pd.to_numeric(gdp_melted['Year'])

# Rename 'Country Name' column
gdp_melted = gdp_melted.rename(columns={'Country Name': 'Country'})

# Merge the COVID dataset with the GDP dataset on 'Country' and 'Year'
merged_data = pd.merge(covid_2020, gdp_melted, how='inner', on=['Country', 'Year'])

# Rename columns in the democracy dataset
democracy_data = democracy_data.rename(columns={'country': 'Country', 'year': 'Year'})

# Filter democracy data for the year 2020 and select relevant columns
democracy_2020 = democracy_data[democracy_data['Year'] == 2020][['Country', 'Year',  'total_index_context', 'classification_context']]

# Ensure there are no leading spaces in 'Country' names
democracy_2020['Country'] = democracy_2020['Country'].str.strip()

# Merge the data with COVID and GDP with democracy scores on 'Country' and 'Year'
merged_data2 = pd.merge(merged_data, democracy_2020, how='inner', on=['Country', 'Year'])

# Group by 'Country' and 'Year'
grouped_data = merged_data2.groupby(['Country', 'Year']).agg({
    'Deaths': 'sum',  # sum deaths for each date in month
    'GDP_per_Capita': 'mean',  # Average GDP per capita
    'total_index_context': 'mean',  # Average democracy
    'classification_context': 'first', # Get the first classification context
}).reset_index()

# rename columns
grouped_data=grouped_data.rename(columns={'total_index_context': 'Democracy_Scores', 'classification_context': 'Democracy_classification'})

# Export the final merged DataFrame to a CSV file
grouped_data.to_csv('merged_covid_gdp_democracy.csv', index=False)





