## Setup

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the temperature anomaly dataset
temp_url = "https://bit.ly/monthly_temp"
temp_df = pd.read_csv(temp_url, parse_dates=['Date'])

# Load the CO2 concentration dataset
co2_url = "https://bit.ly/monthly_CO2"
co2_df = pd.read_csv(co2_url, parse_dates=['Date'])

print("Temperature data:")
print(temp_df.head())
print("\nCO2 data:")
print(co2_df.head())

## Task 1: Data Preparation
1. Set the ‘Date’ column as the index for both dataframes.
2. Ensure that there are no missing values in either dataset.

In [None]:
temp_df.set_index("Date",inplace=True)
co2_df.set_index("Date",inplace=True)

In [None]:
print(temp_df)
print(co2_df)

In [None]:
print(temp_df.isnull().sum())
print(co2_df.isnull().sum())

## Task 2: Joining Datasets
1. Merge the temperature and CO2 datasets based on their date index.
2. Handle any missing values that may have been introduced by the merge.
3. Create some plots showing temperature anomalies and CO2 concentrations over time using pandas built-in plotting functions.

In [None]:
merged_df = pd.merge(temp_df, co2_df, on='Date',how='inner')
merged_df.info()

In [None]:
merged_df.isnull().sum()

In [None]:
merged_df.plot(y='CO2Concentration')
merged_df.plot(y='MonthlyAnomaly')

## Task 3: Time Series Analysis
1. Resample the data to annual averages.
2. Calculate the year-over-year change in temperature anomalies and CO2 concentrations.
3. Create a scatter plot (use the plt.scatter() function) of annual temperature anomalies vs CO2 concentrations.

In [None]:
annual_avgs = merged_df.copy()
annual_avgs = annual_avgs.groupby(merged_df.index.year).mean()
print(annual_avgs)

In [None]:
# def year_over_year(df):
#     anomaly_year = annual_avgs['MonthlyAnomaly'].diff
#     CO2_year = annual_avgs['CO2Concentration'].diff
#     return anomaly_year, CO2_year 

# merged_df['year_over_year'] = merged_df.index.to_series.apply(diff)

In [None]:
annual_avgs['year_over_year_anomaly'] = annual_avgs['MonthlyAnomaly'].diff()
annual_avgs['year_over_year_co2'] = annual_avgs['CO2Concentration'].diff()
annual_avgs.head()

In [None]:
#year_over_year(2020)

In [None]:
plt.scatter(x=annual_avgs['year_over_year_anomaly'], y=annual_avgs['year_over_year_co2'])

## Task 4: Seasonal Analysis
1. Create a function to extract the season from a given date (hint: use the date.month attribute and if-elif-else to assign the season in your function).
2. Use the function to create a new column called Season
3. Calculate the average temperature anomaly and CO2 concentration for each season.
4. Create a box plot (use sns.boxplot) showing the distribution of temperature anomalies for each season.

In [None]:
def season_date(date):
    our_month = date.month
    if our_month in (3, 4, 5):
        season = "Spring"
    elif our_month in (6, 7, 8):
        season = "Summer"
    elif our_month in (9, 10, 11):
        season = "Fall"
    elif our_month in (12, 1, 2):
        season = "Winter"
    return(season)

In [None]:
merged_df['Season'] = merged_df.index.map(season_date)
merged_df.head()

In [None]:
merged_df_group = merged_df.groupby('Season').mean('MonthlyAnomaly')
merged_df_group = merged_df.groupby('Season').mean('CO2Concentration')

merged_df_group.head()

In [None]:
sns.boxplot(merged_df_group, MonthlyAnomaly, )