#**Regression**:
##Predicting the actual carbon intensity given the forecasted value

In [None]:
import requests
import datetime
import json

def fetch_carbon_intensity_data(start_date, end_date):
    # Define the URL endpoint
    base_url = "https://api.carbonintensity.org.uk/intensity/"
    url = f"{base_url}{start_date}/{end_date}"

    # Set the headers
    headers = {
        'Accept': 'application/json'
    }

    # Make the GET request
    response = requests.get(url, headers=headers)

    # Check if the request was successful
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

if __name__ == "__main__":
    # Define the date range (14 days from today)
    end_date = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
    start_date = (datetime.datetime.utcnow() - datetime.timedelta(days=14)).strftime('%Y-%m-%dT%H:%M:%SZ')

    # Fetch the data
    data = fetch_carbon_intensity_data(start_date, end_date)

    # Save the data to a JSON file
    if data:
        with open('carbon_intensity_data.json', 'w') as file:
            json.dump(data, file, indent=4)
        print("Data saved to 'carbon_intensity_data.json'")
    else:
        print("Failed to fetch data.")



In [26]:
import pandas as pd

file_path = '/content/carbon_intensity_data.json'

# Read the JSON file into a DataFrame
df = pd.read_json(file_path)

# Assuming the data is structured with a 'data' key, extract that into a new DataFrame
df = pd.DataFrame(df['data'].tolist())
print(df)


                  from                 to  \
0    2023-08-07T20:00Z  2023-08-07T20:30Z   
1    2023-08-07T20:30Z  2023-08-07T21:00Z   
2    2023-08-07T21:00Z  2023-08-07T21:30Z   
3    2023-08-07T21:30Z  2023-08-07T22:00Z   
4    2023-08-07T22:00Z  2023-08-07T22:30Z   
..                 ...                ...   
666  2023-08-21T17:00Z  2023-08-21T17:30Z   
667  2023-08-21T17:30Z  2023-08-21T18:00Z   
668  2023-08-21T18:00Z  2023-08-21T18:30Z   
669  2023-08-21T18:30Z  2023-08-21T19:00Z   
670  2023-08-21T19:00Z  2023-08-21T19:30Z   

                                             intensity  
0    {'forecast': 176, 'actual': 173, 'index': 'mod...  
1    {'forecast': 174, 'actual': 177, 'index': 'mod...  
2    {'forecast': 172, 'actual': 164, 'index': 'mod...  
3    {'forecast': 148, 'actual': 126, 'index': 'mod...  
4     {'forecast': 144, 'actual': 110, 'index': 'low'}  
..                                                 ...  
666  {'forecast': 163, 'actual': 167, 'index': 'mod...  
667

In [27]:
# Feature Engineering

# 1. Difference between forecasted and actual intensity
df['intensity_difference'] = df['intensity'].apply(lambda x: x['forecast'] - x['actual'])

# 2. Rolling averages over certain intervals (e.g., 4 intervals which is 2 hours)
df['rolling_avg_forecast'] = df['intensity'].apply(lambda x: x['forecast']).rolling(window=4).mean()
df['rolling_avg_actual'] = df['intensity'].apply(lambda x: x['actual']).rolling(window=4).mean()

# 3. Time-based features
df['from_datetime'] = pd.to_datetime(df['from'])
df['hour_of_day'] = df['from_datetime'].dt.hour
df['day_of_week'] = df['from_datetime'].dt.dayofweek  # Monday=0, Sunday=6

# Display the DataFrame with the new features
print(df[['from', 'intensity_difference', 'rolling_avg_forecast', 'rolling_avg_actual', 'hour_of_day', 'day_of_week']])

                  from  intensity_difference  rolling_avg_forecast  \
0    2023-08-07T20:00Z                     3                   NaN   
1    2023-08-07T20:30Z                    -3                   NaN   
2    2023-08-07T21:00Z                     8                   NaN   
3    2023-08-07T21:30Z                    22                167.50   
4    2023-08-07T22:00Z                    34                159.50   
..                 ...                   ...                   ...   
666  2023-08-21T17:00Z                    -4                129.25   
667  2023-08-21T17:30Z                   -19                141.25   
668  2023-08-21T18:00Z                   -19                150.50   
669  2023-08-21T18:30Z                   -26                155.50   
670  2023-08-21T19:00Z                   -13                154.00   

     rolling_avg_actual  hour_of_day  day_of_week  
0                   NaN           20            0  
1                   NaN           20            0  
2  

In [28]:
print(df.isna().sum())
df.dropna(inplace=True)
print(df.isna().sum())

from                    0
to                      0
intensity               0
intensity_difference    0
rolling_avg_forecast    3
rolling_avg_actual      3
from_datetime           0
hour_of_day             0
day_of_week             0
dtype: int64
from                    0
to                      0
intensity               0
intensity_difference    0
rolling_avg_forecast    0
rolling_avg_actual      0
from_datetime           0
hour_of_day             0
day_of_week             0
dtype: int64


In [29]:
# One-hot encode the 'index' column
index_dummies = pd.get_dummies(df['intensity'].apply(lambda x: x['index']), prefix='index')

# Concatenate the one-hot encoded columns to the original dataframe
df = pd.concat([df, index_dummies], axis=1)
print(df.columns)

Index(['from', 'to', 'intensity', 'intensity_difference',
       'rolling_avg_forecast', 'rolling_avg_actual', 'from_datetime',
       'hour_of_day', 'day_of_week', 'index_high', 'index_low',
       'index_moderate'],
      dtype='object')


In [31]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.regularizers import l2
from keras.optimizers import Adam
import matplotlib.pyplot as plt

# Data Preparation
X = df[['index_moderate', 'index_high', 'index_low']].values
y = df['intensity'].apply(lambda x: x['actual']).values

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model Building
model = Sequential()
model.add(Dense(64, input_dim=3, activation='relu'))  # Input layer
model.add(Dense(32, activation='relu'))  # Hidden layer
model.add(Dense(1, activation='linear'))  # Output layer

# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=10, validation_data=(X_test, y_test))

# Evaluation
loss = model.evaluate(X_test, y_test)
print(f"Mean Squared Error on Test Set: {loss}")



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78