# Coding Collab 

https://eds-217-essential-python.github.io/course-materials/coding-colabs/6b_advanced_data_manipulation.html

9/10/24

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the temperature anomaly dataset
temp_url = "https://bit.ly/monthly_temp"
temp_df = pd.read_csv(temp_url, parse_dates=['Date'])

# Load the CO2 concentration dataset
co2_url = "https://bit.ly/monthly_CO2"
co2_df = pd.read_csv(co2_url, parse_dates=['Date'])

print("Temperature data:")
print(temp_df.head())
print("\nCO2 data:")
print(co2_df.head())

## Task 1: Data Preparation

In [None]:
temp_df.set_index('Date', inplace=True)

co2_df.set_index('Date', inplace=True)

In [None]:
# Check for missing values

print(temp_df.isnull().sum())

print(co2_df.isnull().sum())

## Task 2: Joining Dataseta


In [None]:
# Merge the temperature and CO2 datasets based on their date index.

merged_df = pd.merge(temp_df, co2_df, on = "Date", how = "inner")
print(merged_df)

In [None]:
# Handle any missing values that may have been introduced by the merge.

print(merged_df.isnull().sum())

In [None]:
# Create some plots showing temperature anomalies and CO2 concentrations over time using pandas built-in plotting functions.

print(merged_df.plot(y = 'MonthlyAnomaly', use_index = True))
print(merged_df.plot(y = 'CO2Concentration', use_index = True))

## Task 3: Time Series Analysis

In [None]:
# Resample the data to annual averages.

annual_avg = merged_df.copy()
annual_avg = annual_avg.groupby(merged_df.index.year).mean()
annual_avg.head(5)

In [None]:
# Calculate the year-over-year change in temperature anomalies and CO2 concentrations.

def year_over_year(year1, year2):
    anomaly_year = (annual_avg['MonthlyAnomaly'][year1])-(annual_avg['MonthlyAnomaly'][year2])
    co2_year = (annual_avg['CO2Concentration'][year1])-(annual_avg['CO2Concentration'][year2])
    print(anomaly_year,co2_year)
    
    

In [None]:
# also...

annual_avg['monthly_y2y'] = annual_avg['MonthlyAnomaly'].diff()
annual_avg['co2_y2y'] = annual_avg['CO2Concentration'].diff()
annual_avg.head(5)

In [None]:
year_over_year(2020, 2021)

In [None]:
# Create a scatter plot (use the plt.scatter() function) of annual temperature anomalies vs CO2 concentrations.

annual_avg.plot.scatter(x = 'monthly_y2y', y = 'co2_y2y')

## Task 4: Seasonal Analysis

In [None]:
# create a function to extract the season from a given date (hint: use the date.month attribute and if-elif-else to assign the season in your function).

# from merged_df import Date

def season(date):
    #date = pd.to_datetime(date)
    month = date.month
    
    if month in (12, 1, 2):
        return "Winter"
    elif month in (3, 4, 5):
        return "Spring"
    elif month in (6, 7, 8):
        return "Summer"
    elif month in (9, 10, 11):
        return "Fall"
    else:
        return (season)


In [None]:
# Use the function to create a new column called Season
merged_df['Season'] = merged_df.index.map(season)
merged_df.head()


In [None]:
# Calculate the average temperature anomaly and CO2 concentration for each season.

merged_df = merged_df.groupby('Season').mean('MonthlyAnomaly')
merged_df = merged_df.groupby('Season').mean('CO2Concentration')

In [None]:
# Create a box plot (use sns.boxplot) showing the distribution of temperature anomalies for each season.
merged_df.sns.boxplot()