#Data Pre-Processing

In [None]:
import pandas as pd

# Read CO2 Emissions data
co2Data = pd.read_csv('/content/CO2Data.csv')

# Read Greenhouse Gases data
ggData = pd.read_csv('/content/GreenhouseGasses.csv')

# Read Surface Temperature Change data
tempData = pd.read_csv('/content/TempData.csv')

# Now you can perform data preprocessing or further analysis on the loaded datasets

In [None]:
ggData.head()

Unnamed: 0,Year,CO2,CH4,N2O,CFC*,HCFCs,HFCs*,Total,Total.1,1990 = 1,change **
0,1979,1.027,0.406,0.104,0.154,0.008,0.001,1.7,382,0.785,
1,1980,1.06,0.413,0.104,0.163,0.009,0.001,1.749,386,0.808,2.3
2,1981,1.079,0.42,0.107,0.172,0.009,0.001,1.788,388,0.825,1.8
3,1982,1.091,0.426,0.111,0.18,0.01,0.001,1.819,391,0.84,1.5
4,1983,1.117,0.429,0.113,0.19,0.011,0.001,1.861,394,0.859,1.9


In [None]:
tempData.head()

Unnamed: 0,Year,Value
0,1850,-0.22
1,1851,-0.19
2,1852,-0.38
3,1853,-0.18
4,1854,-0.22


In [None]:
co2Data.head()

Unnamed: 0,year,mean,unc
0,1959,315.98,0.12
1,1960,316.91,0.12
2,1961,317.64,0.12
3,1962,318.45,0.12
4,1963,318.99,0.12


In [None]:
co2Data.describe()

Unnamed: 0,year,mean,unc
count,64.0,64.0,64.0
mean,1990.5,358.295156,0.12
std,18.618987,30.583707,0.0
min,1959.0,315.98,0.12
25%,1974.75,330.895,0.12
50%,1990.5,355.075,0.12
75%,2006.25,382.5725,0.12
max,2022.0,418.56,0.12


In [None]:
# Check for missing values in each dataset
co2_missing_values = co2Data.isnull().sum()
gg_missing_values = ggData.isnull().sum()
temp_missing_values = tempData.isnull().sum()

# Print the number of missing values in each dataset
print("CO2 Emissions - Missing Values:")
print(co2_missing_values)

print("\nGreenhouse Gases - Missing Values:")
print(gg_missing_values)

print("\nSurface Temperature Change - Missing Values:")
print(temp_missing_values)

CO2 Emissions - Missing Values:
year    0
mean    0
unc     0
dtype: int64

Greenhouse Gases - Missing Values:
Year         0
CO2          0
CH4          0
N2O          0
CFC*         0
HCFCs        0
HFCs*        0
Total        0
Total.1      0
1990 = 1     0
change **    1
dtype: int64

Surface Temperature Change - Missing Values:
Year     0
Value    0
dtype: int64


In [None]:
co2Data = co2Data.fillna(co2Data.mean())
ggData = ggData.fillna(ggData.mean())
tempData = tempData.fillna(tempData.mean())

In [None]:
# rename 'year' column in co2Data
co2Data.rename(columns={'year': 'Year'}, inplace=True)

# filter dataframes to include data from 1974 to 2021 only
ggData = ggData[(ggData['Year'] >= 1974) & (ggData['Year'] <= 2021)]
tempData = tempData[(tempData['Year'] >= 1974) & (tempData['Year'] <= 2021)]
co2Data = co2Data[(co2Data['Year'] >= 1974) & (co2Data['Year'] <= 2021)]

# merge the dataframes
df = pd.merge(ggData, tempData, on='Year')
df = pd.merge(df, co2Data, on='Year')

In [None]:
# Define a dictionary with old column names as keys and new column names as values
rename_dict = {
    'Year': 'Year',
    'CO2': 'CO2',
    'CH4': 'Methane',
    'N2O': 'Nitrous_Oxide',
    'CFC*': 'CFCs',
    'HCFCs': 'Hydrochlorofluorocarbons',
    'HFCs*': 'Hydrofluorocarbons',
    'Total': 'Total_Greenhouse_Gases',
    'Total.1': 'Total_Greenhouse_Gases_Scaled',
    '1990 = 1': '1990_Equals_1',
    'change **': 'Change',
    'Value': 'Surface_Temperature',
    'mean': 'CO2_Mean',
    'unc': 'CO2_Uncertainty'
}

# Rename the columns
df.rename(columns=rename_dict, inplace=True)
df = df.drop(columns=['CO2_Uncertainty'])

In [None]:
df.describe()

Unnamed: 0,Year,CO2,Methane,Nitrous_Oxide,CFCs,Hydrochlorofluorocarbons,Hydrofluorocarbons,Total_Greenhouse_Gases,Total_Greenhouse_Gases_Scaled,1990_Equals_1,Change,Surface_Temperature,CO2_Mean
count,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
mean,2000.911111,1.570311,0.475667,0.154622,0.251378,0.034044,0.014711,2.500689,445.244444,1.154733,1.677273,0.576444,374.498444
std,12.990945,0.343749,0.030469,0.032262,0.034604,0.017099,0.014129,0.452937,37.559744,0.209166,0.372866,0.276703,24.544047
min,1979.0,1.027,0.406,0.104,0.154,0.008,0.001,1.7,382.0,0.785,0.7,0.1,336.84
25%,1990.0,1.294,0.459,0.129,0.246,0.018,0.003,2.166,417.0,1.0,1.5,0.37,354.45
50%,2001.0,1.538,0.481,0.153,0.263,0.034,0.009,2.494,443.0,1.152,1.7,0.57,371.32
75%,2012.0,1.848,0.494,0.181,0.276,0.051,0.025,2.86,475.0,1.321,1.8,0.79,394.06
max,2021.0,2.14,0.526,0.21,0.282,0.058,0.044,3.222,508.0,1.488,2.7,1.14,416.45


In [None]:
df.isnull().sum()

Year                             0
CO2                              0
Methane                          0
Nitrous_Oxide                    0
CFCs                             0
Hydrochlorofluorocarbons         0
Hydrofluorocarbons               0
Total_Greenhouse_Gases           0
Total_Greenhouse_Gases_Scaled    0
1990_Equals_1                    0
Change                           0
Surface_Temperature              0
CO2_Mean                         0
dtype: int64

In [None]:
df_list = df.values.tolist()
df_list.insert(0, list(df.columns))

In [None]:
df_list

[['Year',
  'CO2',
  'Methane',
  'Nitrous_Oxide',
  'CFCs',
  'Hydrochlorofluorocarbons',
  'Hydrofluorocarbons',
  'Total_Greenhouse_Gases',
  'Total_Greenhouse_Gases_Scaled',
  '1990_Equals_1',
  'Change',
  'Surface_Temperature',
  'CO2_Mean'],
 [1979.0,
  1.027,
  0.406,
  0.104,
  0.154,
  0.008,
  0.001,
  1.7,
  382.0,
  0.785,
  1.6772727272727268,
  0.1,
  336.84],
 [1980.0,
  1.06,
  0.413,
  0.104,
  0.163,
  0.009,
  0.001,
  1.749,
  386.0,
  0.808,
  2.3,
  0.33,
  338.76],
 [1981.0,
  1.079,
  0.42,
  0.107,
  0.172,
  0.009,
  0.001,
  1.788,
  388.0,
  0.825,
  1.8,
  0.38,
  340.12],
 [1982.0,
  1.091,
  0.426,
  0.111,
  0.18,
  0.01,
  0.001,
  1.819,
  391.0,
  0.84,
  1.5,
  0.22,
  341.48],
 [1983.0,
  1.117,
  0.429,
  0.113,
  0.19,
  0.011,
  0.001,
  1.861,
  394.0,
  0.859,
  1.9,
  0.25,
  343.15],
 [1984.0,
  1.141,
  0.432,
  0.116,
  0.198,
  0.012,
  0.002,
  1.901,
  397.0,
  0.878,
  1.9,
  0.13,
  344.87],
 [1985.0,
  1.164,
  0.437,
  0.118,
  0.20

#Least Squares Regression

In [None]:
!pip install pandas
!pip install numpy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[31mERROR: Operation cancelled by user[0m[31m
[0mTraceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pip/_vendor/pkg_resources/__init__.py", line 3108, in _dep_map
    return self.__dep_map
  File "/usr/local/lib/python3.10/dist-packages/pip/_vendor/pkg_resources/__init__.py", line 2901, in __getattr__
    raise AttributeError(attr)
AttributeError: _DistInfoDistribution__dep_map

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/base_command.py", line 169, in exc_logging_wrapper
    status = run_func(*args)
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/req_command.py", line 242, in wrapper
    return func(self, options, args)
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/commands/install.py", line 441

In [None]:
import pandas as pd

# Read CO2 Emissions data
co2Data2 = pd.read_csv('/content/CO2Data.csv')
co2List = co2Data2.values.tolist()

# Read Greenhouse Gases data
ggData2 = pd.read_csv('/content/GreenhouseGasses.csv')
ggList = ggData2.values.tolist()

# Read Surface Temperature Change data
tempData2 = pd.read_csv('/content/TempData.csv')
tempList = tempData2.values.tolist()

In [None]:
import numpy as np

def slope(x_coordinates, y_coordinates):
    x_coordinates_sum = np.sum(x_coordinates)
    y_coordinates_sum = np.sum(y_coordinates)
    xy_coordinates_sum = np.sum(x_coordinates*y_coordinates)
    x_squared_coordinates_sum = np.sum(x_coordinates*x_coordinates)
    
    slope = (xy_coordinates_sum*len(x_coordinates) - x_coordinates_sum*y_coordinates_sum) / (x_squared_coordinates_sum*len(x_coordinates) - x_coordinates_sum*x_coordinates_sum)
    y_intercept = (y_coordinates_sum - slope*x_coordinates_sum) / len(x_coordinates)
    
    return slope, y_intercept

print(df_list[len(df_list)-1])
year = input("What year would you like to find data about?: ")
print("In the year "+year+" our model predicts that:")
units = ["years","","","","","","","","","","","","ppm"]

yValues = []
for index in range(1, len(df_list)):
    yValues.append(df_list[index][0])
yValues = np.array(yValues)  # Convert yValues to a numpy array
for index1 in range(1, len(df_list[0])):
    xValues = []
    for index2 in range(1, len(df_list)):
        xValues.append(df_list[index2][index1])
    xValues = np.array(xValues)  # Convert xValues to a numpy array
    line = slope(yValues, xValues)
    prediction=line[0]*float(year)+line[1]
    print(str(df_list[0][index1])+" = "+str(round(prediction,3))+" "+units[index1])
    

(1.5182926829268293, 0.30487804878048763)
[2021.0, 2.14, 0.526, 0.21, 0.246, 0.058, 0.044, 3.222, 508.0, 1.488, 1.8, 0.79, 416.45]
What year would you like to find data about?: 2050
In the year 2050 our model predicts that:
CO2 = 2.866
Methane = 0.588
Nitrous_Oxide = 0.276
CFCs = 0.318
Hydrochlorofluorocarbons = 0.098
Hydrofluorocarbons = 0.065
Total_Greenhouse_Gases = 4.21
Total_Greenhouse_Gases_Scaled = 586.906
1990_Equals_1 = 1.944
Change = 1.603
Surface_Temperature = 1.499
CO2_Mean = 466.782


#AI