# Mini Project: Global Temperature Anomaly Forecasting

This project analyzes global temperature anomalies and attempts to forecast future trends using time-series analysis techniques (ARIMA).

**Objective:** Build a self-contained pipeline that fetches real data, checks for stationarity, and fits a forecast model.

**Data Source:** NASA GISTEMP (Global Land-Ocean Temperature Index).

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import io
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA

def fetch_data():
    url = "https://data.giss.nasa.gov/gistemp/tabledata_v4/GLB.Ts+dSST.csv"
    print("Fetching GISTEMP data...")
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        content = response.content.decode('utf-8')
        df = pd.read_csv(io.StringIO(content), skiprows=1)
        return df, True
    except Exception as e:
        print(f"Failed to fetch data: {e}")
        print("Generating synthetic data for demonstration...")
        return None, False

# 1. Fetch Data
df, success = fetch_data()

if success:
    # Clean up the dataframe
    # Filter rows that are actual years
    df = df[pd.to_numeric(df['Year'], errors='coerce').notnull()]
    df['Year'] = df['Year'].astype(int)

    # Reshape to long format
    monthly_cols = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    for col in monthly_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    ts_data = []
    for index, row in df.iterrows():
        year = row['Year']
        for i, month in enumerate(monthly_cols):
            val = row[month]
            date = pd.Timestamp(year=year, month=i+1, day=1)
            ts_data.append({'date': date, 'anomaly': val})

    ts_df = pd.DataFrame(ts_data)
    ts_df.set_index('date', inplace=True)
    ts_df.dropna(inplace=True)
else:
    # Generate synthetic data with trend and seasonality
    dates = pd.date_range(start="1880-01-01", end="2023-12-01", freq="MS")
    t = np.arange(len(dates))
    # Trend: 0.005 degrees per month
    trend = 0.005 * t
    # Seasonality (ignored for anomaly usually, but let's add some noise)
    noise = np.random.normal(0, 0.1, len(dates))
    anomaly = -0.5 + trend + noise
    ts_df = pd.DataFrame({'anomaly': anomaly}, index=dates)

print(f"Data loaded: {len(ts_df)} monthly observations from {ts_df.index.min().year} to {ts_df.index.max().year}")

plt.figure(figsize=(14, 6))
plt.plot(ts_df, label='Monthly Anomaly')
plt.title("Global Land-Ocean Temperature Index (Anomaly)")
plt.ylabel("Anomaly (Â°C)")
plt.legend()
plt.show()

## Stationarity Check (ADF Test)

ARIMA models assume stationarity (constant mean/variance). We check this using the Augmented Dickey-Fuller test. If p-value > 0.05, the data is non-stationary.

In [None]:
def check_stationarity(timeseries):
    result = adfuller(timeseries)
    print('ADF Statistic: %f' % result[0])
    print('p-value: %f' % result[1])
    if result[1] > 0.05:
        print("Fail to reject the null hypothesis. Data is non-stationary.")
    else:
        print("Reject the null hypothesis. Data is stationary.")

check_stationarity(ts_df['anomaly'])

# If non-stationary, we likely need differencing (d parameter in ARIMA)

## Forecasting with ARIMA

We will fit an ARIMA model. Since there is a clear trend, we expect `d=1`.

In [None]:
# Split data
train = ts_df.iloc[:-24] # Hold out last 2 years
test = ts_df.iloc[-24:]

# Fit ARIMA Model
model = ARIMA(train, order=(5,1,0))
model_fit = model.fit()

print(model_fit.summary())

# Forecast
forecast_result = model_fit.get_forecast(steps=24)
forecast = forecast_result.predicted_mean
conf_int = forecast_result.conf_int()

# Plotting
plt.figure(figsize=(14, 7))
# Plot only the last 5 years of training data for clarity
last_5_years = train.iloc[-60:]
plt.plot(last_5_years.index, last_5_years['anomaly'], label='Train (Last 5 Years)')
plt.plot(test.index, test['anomaly'], label='Actual')
plt.plot(test.index, forecast, label='Forecast', color='red')
plt.fill_between(test.index, conf_int.iloc[:, 0], conf_int.iloc[:, 1], color='pink', alpha=0.3)
plt.title("Temperature Anomaly Forecast (ARIMA)")
plt.legend()
plt.show()