# Energy Consumption Data Analysis

This notebook performs a comprehensive analysis of energy consumption data to minimize energy costs.

## Data Cleaning

In [None]:
import pandas as pd

# Load the dataset
energy_data_path = '/path/to/your/Europe_energy_data.csv'
energy_data = pd.read_csv(energy_data_path)

# Data Cleaning
# Handle missing values by dropping columns with more than 50% missing values
threshold = 0.5 * len(energy_data)
energy_data_cleaned = energy_data.dropna(thresh=threshold, axis=1)

# Fill remaining missing values with appropriate strategies (e.g., mean, median)
energy_data_cleaned.fillna(energy_data_cleaned.mean(), inplace=True)

# Check the cleaned data
energy_data_cleaned.info()

## Exploratory Data Analysis (EDA)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Summary statistics
summary_statistics = energy_data_cleaned.describe()

# Visualize data distributions
sns.pairplot(energy_data_cleaned.select_dtypes(include=['float64', 'int64']).sample(frac=0.1))
plt.show()

# Plot correlations
correlation_matrix = energy_data_cleaned.corr()
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm')
plt.show()

## Energy Cost and Availability Analysis

In [None]:
# Select relevant columns
relevant_columns = [
    'year', 'coal_prod_change_twh', 'gas_prod_change_twh', 
    'oil_prod_change_twh', 'solar_consumption', 'wind_consumption'
]

energy_cost_availability = energy_data_cleaned[relevant_columns]

# Plot energy production/consumption changes over years
plt.figure(figsize=(14, 8))
for column in relevant_columns[1:]:
    plt.plot(energy_cost_availability['year'], energy_cost_availability[column], label=column)

plt.xlabel('Year')
plt.ylabel('Energy Production/Consumption (TWh)')
plt.title('Energy Production/Consumption Changes Over Years')
plt.legend()
plt.show()

# Compare average changes
average_changes = energy_cost_availability.mean()
print("Average changes in energy production/consumption:\n", average_changes)

## Optimization Recommendations

In [None]:
# Based on the analysis, suggest the best energy strategies
# Example: Calculate the cost-effectiveness ratio (simplified for illustration)

# Assuming arbitrary costs per TWh for different energy sources
energy_costs = {
    'coal': 50,  # cost per TWh
    'gas': 60,
    'oil': 70,
    'solar': 40,
    'wind': 45
}

# Calculate cost-effectiveness
cost_effectiveness = {
    'coal': average_changes['coal_prod_change_twh'] / energy_costs['coal'],
    'gas': average_changes['gas_prod_change_twh'] / energy_costs['gas'],
    'oil': average_changes['oil_prod_change_twh'] / energy_costs['oil'],
    'solar': average_changes['solar_consumption'] / energy_costs['solar'],
    'wind': average_changes['wind_consumption'] / energy_costs['wind']
}

# Suggest the most cost-effective energy source
best_energy_source = max(cost_effectiveness, key=cost_effectiveness.get)
print(f"The most cost-effective energy source is: {best_energy_source}")