In [None]:
!pip install tensorflow
!pip install numpy pandas matplotlib scikit-learn


^C
Collecting tensorflow
  Using cached tensorflow-2.17.0-cp310-cp310-win_amd64.whl.metadata (3.2 kB)
Collecting tensorflow-intel==2.17.0 (from tensorflow)
  Using cached tensorflow_intel-2.17.0-cp310-cp310-win_amd64.whl.metadata (5.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.17.0->tensorflow)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.17.0->tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow-intel==2.17.0->tensorflow)
  Using cached flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.17.0->tensorflow)
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.17.0->tensorflow)
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)


In [2]:
import pandas as pd

# Load the dataset
df = pd.read_csv('World Energy Consumption.csv')

# Select a specific country, e.g., 'United States'
country_data = df[df['country'] == 'United States'].copy()

# Focus on the columns relevant to the LSTM model
# Assuming 'year' and a target variable like 'Total_Energy_Consumption'
energy_columns = [
    'biofuel_consumption',
    'coal_consumption',        # Assuming these columns exist
    'gas_consumption',         # Adjust as needed
    'oil_consumption',         # Adjust as needed
    'nuclear_consumption',     # Adjust as needed
    'hydro_consumption',       # Adjust as needed
    'solar_consumption',
    'wind_consumption'
]

# Create Total Energy Consumption by summing available energy sources
available_energy_columns = [col for col in energy_columns if col in country_data.columns]
country_data['Total_Energy_Consumption'] = country_data[available_energy_columns].sum(axis=1)

# Convert 'year' to datetime and set as index
country_data['year'] = pd.to_datetime(country_data['year'], format='%Y')
country_data.set_index('year', inplace=True)

# Sort the data by the index
country_data.sort_index(inplace=True)

# Handle missing values
country_data['Total_Energy_Consumption'].fillna(method='ffill', inplace=True)


ModuleNotFoundError: No module named 'pandas'

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(country_data[['Total_Energy_Consumption']])

# Convert the scaled data back to a DataFrame
scaled_data = pd.DataFrame(scaled_data, index=country_data.index, columns=['Total_Energy_Consumption'])


In [1]:
def create_sequences(data, sequence_length):
    sequences = []
    labels = []
    
    for i in range(sequence_length, len(data)):
        sequences.append(data[i-sequence_length:i])
        labels.append(data[i])
        
    return np.array(sequences), np.array(labels)

# Define sequence length
sequence_length = 10  # Using 10 years of data to predict the next year

# Create sequences and labels
X, y = create_sequences(scaled_data['Total_Energy_Consumption'].values, sequence_length)
X = X.reshape((X.shape[0], X.shape[1], 1))  # Reshape for LSTM

print(f'Shape of X: {X.shape}')
print(f'Shape of y: {y.shape}')


NameError: name 'scaled_data' is not defined

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

# Define the LSTM model
model = Sequential()

# Add LSTM layers with dropout to prevent overfitting
model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))

# Add a Dense layer to produce the output
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Model summary
model.summary()


In [None]:
# Split the data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

# Train the LSTM model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))


In [None]:
import matplotlib.pyplot as plt

# Predict on the test set
predictions = model.predict(X_test)

# Inverse transform the predictions and actual values
predictions = scaler.inverse_transform(predictions)
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate RMSE
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
print(f'Root Mean Squared Error: {rmse}')

# Plot the results
plt.figure(figsize=(14, 6))
plt.plot(country_data.index[-len(y_test):], y_test, color='blue', label='Actual Energy Consumption')
plt.plot(country_data.index[-len(predictions):], predictions, color='red', label='Predicted Energy Consumption')
plt.title('Actual vs Predicted Energy Consumption')
plt.xlabel('Year')
plt.ylabel('Energy Consumption (TWh)')
plt.legend()
plt.show()


In [None]:
def forecast_future_values(model, input_sequence, n_steps, scaler):
    predictions = []
    current_sequence = input_sequence[-1]  # Start with the last available sequence
    
    for _ in range(n_steps):
        # Reshape the sequence for LSTM input
        current_sequence_reshaped = current_sequence.reshape((1, current_sequence.shape[0], 1))
        
        # Predict the next value
        next_value = model.predict(current_sequence_reshaped)
        
        # Append the prediction to the sequence
        predictions.append(next_value[0, 0])
        
        # Update the sequence by adding the predicted value and removing the oldest one
        current_sequence = np.append(current_sequence[1:], next_value)
    
    # Inverse transform the predictions to original scale
    predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
    
    return predictions

# Forecast the next 10 years
n_steps = 10
future_predictions = forecast_future_values(model, X_test, n_steps, scaler)

# Create future dates
last_year = country_data.index[-1]
future_years = pd.date_range(start=last_year + pd.DateOffset(years=1), periods=n_steps, freq='YS')

# Plot the forecast
plt.figure(figsize=(14, 6))
plt.plot(country_data.index, scaler.inverse_transform(scaled_data), label='Historical')
plt.plot(future_years, future_predictions, color='green', label='Forecasted')
plt.title('Future Energy Consumption Forecast')
plt.xlabel('Year')
plt.ylabel('Energy Consumption (TWh)')
plt.legend()
plt.show()

# Display the forecasted values
forecast_df = pd.DataFrame({'Year': future_years, 'Forecasted_Energy_Consumption': future_predictions.flatten()})
print(forecast_df)
