In [3]:
import pandas as pd
import openpyxl as op

In [6]:
df = pd.read_csv('merged_data_inner.csv')
df.head()

Unnamed: 0,Country,Year,AverageTemperature,Code,Annual nitrous oxide emissions in CO₂ equivalents,Annual methane emissions in CO₂ equivalents,Annual CO₂ emissions
0,Afghanistan,1850,13.185427,AFG,223008.4,3594926.5,3520884.0
1,Afghanistan,1851,13.391073,AFG,227659.61,3615134.5,3561188.2
2,Afghanistan,1852,13.337948,AFG,232190.92,3635346.8,3596619.0
3,Afghanistan,1853,13.270833,AFG,236528.19,3655563.5,3630340.0
4,Afghanistan,1854,13.481042,AFG,240597.22,3675785.0,3662827.5


In [8]:
from sklearn.preprocessing import StandardScaler

# Standardize the CO2 and CH4 columns
scaler = StandardScaler()
df[['Annual CO₂ emissions', 'Annual methane emissions in CO₂ equivalents']] = scaler.fit_transform(df[['Annual CO₂ emissions', 'Annual methane emissions in CO₂ equivalents']])


In [16]:
# Correlation matrix
correlation_matrix = df[['AverageTemperature', 'Annual CO₂ emissions', 'Annual methane emissions in CO₂ equivalents']].corr()
print(correlation_matrix)


                                             AverageTemperature  \
AverageTemperature                                     1.000000   
Annual CO₂ emissions                                  -0.108433   
Annual methane emissions in CO₂ equivalents           -0.081471   

                                             Annual CO₂ emissions  \
AverageTemperature                                      -0.108433   
Annual CO₂ emissions                                     1.000000   
Annual methane emissions in CO₂ equivalents              0.851923   

                                             Annual methane emissions in CO₂ equivalents  
AverageTemperature                                                             -0.081471  
Annual CO₂ emissions                                                            0.851923  
Annual methane emissions in CO₂ equivalents                                     1.000000  


In [10]:
from scipy.signal import detrend

# Detrend the data
df['Temperature_detrended'] = detrend(df['AverageTemperature'])
df['CO2_detrended'] = detrend(df['Annual CO₂ emissions'])
df['CH4_detrended'] = detrend(df['Annual methane emissions in CO₂ equivalents'])

# Correlation matrix for detrended data
detrended_correlation_matrix = df[['Temperature_detrended', 'CO2_detrended', 'CH4_detrended']].corr()
print(detrended_correlation_matrix)


                       Temperature_detrended  CO2_detrended  CH4_detrended
Temperature_detrended               1.000000      -0.110089      -0.081803
CO2_detrended                      -0.110089       1.000000       0.852838
CH4_detrended                      -0.081803       0.852838       1.000000


In [12]:
import statsmodels.api as sm

# Prepare the independent variables (with a constant term for the intercept)
X = df[['Annual CO₂ emissions', 'Annual methane emissions in CO₂ equivalents']]
X = sm.add_constant(X)

# Dependent variable
y = df['AverageTemperature']

# Fit the model
model = sm.OLS(y, X).fit()

# Print the summary
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:     AverageTemperature   R-squared:                       0.012
Model:                            OLS   Adj. R-squared:                  0.012
Method:                 Least Squares   F-statistic:                     149.6
Date:                Wed, 29 May 2024   Prob (F-statistic):           2.64e-65
Time:                        22:23:10   Log-Likelihood:                -83585.
No. Observations:               24248   AIC:                         1.672e+05
Df Residuals:                   24245   BIC:                         1.672e+05
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------

In [13]:
# Group by country and calculate correlations for each group
country_corr = df.groupby('Country').apply(lambda group: group[['AverageTemperature', 'Annual CO₂ emissions', 'Annual methane emissions in CO₂ equivalents']].corr().iloc[0, 1:])

print(country_corr)


AverageTemperature  Annual CO₂ emissions  \
Country                                    
Afghanistan                     0.010731   
Albania                         0.282724   
Algeria                         0.679854   
Angola                          0.390454   
Argentina                       0.169801   
...                                  ...   
Venezuela                       0.773036   
Vietnam                         0.464098   
Yemen                           0.775064   
Zambia                          0.109239   
Zimbabwe                        0.707064   

AverageTemperature  Annual methane emissions in CO₂ equivalents  
Country                                                          
Afghanistan                                            0.576001  
Albania                                                0.456505  
Algeria                                                0.666195  
Angola                                                 0.628653  
Argentina                      

  country_corr = df.groupby('Country').apply(lambda group: group[['AverageTemperature', 'Annual CO₂ emissions', 'Annual methane emissions in CO₂ equivalents']].corr().iloc[0, 1:])


In [17]:
country_corr.to_excel('country_correlations.xlsx', index=False)