In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
power_generation_data = pd.read_csv(r"G:\learn\python_works\google_ee\rudra\datas\attatchments\power_generation.csv")

In [3]:
print(power_generation_data)

     Year  Power_generation
0    1981       27434.00000
1    1982       87417.00000
2    1983       82293.00000
3    1984      104012.00000
4    1985      174411.00000
..    ...               ...
115  2096       98660.73543
116  2097      111158.46920
117  2098       85868.76438
118  2099       92953.32481
119  2100      103062.77770

[120 rows x 2 columns]


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Set font properties
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 12

# Step 1: Read the CSV file
file_path = r"G:\learn\python_works\google_ee\rudra\datas\attatchments\power_generation.csv"
try:
    power_generation_data = pd.read_csv(file_path)
except FileNotFoundError:
    print(f"Error: File at {file_path} not found. Please check the path.")
    exit()

# Step 2: Compute annual averages (data is already annual, no aggregation needed)
annual_data = power_generation_data

# Step 3: Split data into observed (1982–2022) and forecasted (2023–2100)
observed_data = annual_data[(annual_data['Year'] >= 1982) & (annual_data['Year'] <= 2022)]
forecasted_data = annual_data[(annual_data['Year'] >= 2023) & (annual_data['Year'] <= 2100)]

# Step 4: Perform linear regression and calculate R² across all data
def linear_regression_and_r2(x, y):
    coeffs = np.polyfit(x, y, 1)
    trend_line = np.poly1d(coeffs)
    y_mean = np.mean(y)
    ss_tot = np.sum((y - y_mean) ** 2)
    ss_res = np.sum((y - trend_line(x)) ** 2)
    r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0
    return coeffs, r_squared, trend_line

if not annual_data.empty:
    coeffs, r2, trend = linear_regression_and_r2(annual_data['Year'], annual_data['Power_generation'])
    equation = f'y = {coeffs[0]:.4f}x + {coeffs[1]:.4f}\nR² = {r2:.4f}'
else:
    equation = "No data for Power Generation"
    trend = lambda x: np.full_like(x, np.nan)

# Step 5: Create DataFrame and save to CSV
if not annual_data.empty:
    annual_data.to_csv('annual_power_generation_trends_data.csv', index=False)
    print("Plotted data saved as 'annual_power_generation_trends_data.csv'.")
else:
    print("No data available to save to CSV.")

# Step 6: Create the plot
plt.figure(figsize=(10, 6))

# Plot observed and forecasted values with different colors
if not observed_data.empty:
    plt.plot(observed_data['Year'], observed_data['Power_generation'], color='blue', label='Observed (1982–2022)', linewidth=2)
if not forecasted_data.empty:
    plt.plot(forecasted_data['Year'], forecasted_data['Power_generation'], color='red', label='Forecasted (2023–2100)', linewidth=2)

# Plot trend line across all data
if not annual_data.empty:
    plt.plot(annual_data['Year'], trend(annual_data['Year']), color='black', linestyle='--', label='Trend Line', linewidth=1)

# Annotations for equation and R²
if not annual_data.empty:
    plt.text(0.02, 0.98, equation, transform=plt.gca().transAxes, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# Labels and title
plt.xlabel('Year')
plt.ylabel('Power Generation (MWh)')
plt.title('Annual Power Generation with Trend Line (1982–2100)')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.7)

# Set y-axis range from 0 to 200,000 kWh
plt.ylim(0, 200000)

# Adjust layout and save
plt.tight_layout()
plt.savefig('annual_power_generation_trends.png', dpi=300, bbox_inches='tight')
plt.close()
print("Chart saved as 'annual_power_generation_trends.png'.")

Plotted data saved as 'annual_power_generation_trends_data.csv'.
Chart saved as 'annual_power_generation_trends.png'.


In [3]:
print(equation)

y = -165.1496x + 455655.7361
R² = 0.0278


In [5]:
import pandas as pd
import matplotlib.pyplot as plt

# Set font properties
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 12

# Step 1: Read the CSV file
file_path = r"G:\learn\python_works\google_ee\rudra\datas\attatchments\power_generation.csv"
try:
    power_generation_data = pd.read_csv(file_path)
except FileNotFoundError:
    print(f"Error: File at {file_path} not found. Please check the path.")
    exit()

# Step 2: Define periods
periods = {
    'Baseline': (1981, 2011),
    '2020s': (2011, 2040),
    '2050s': (2041, 2070),
    '2080s': (2071, 2100)
}

# Step 3: Calculate average power generation for each period
period_means = {}
for period_name, (start_year, end_year) in periods.items():
    period_data = power_generation_data[(power_generation_data['Year'] >= start_year) & (power_generation_data['Year'] <= end_year)]
    mean_power = period_data['Power_generation'].mean()
    period_means[period_name] = mean_power if not pd.isna(mean_power) else 0
    print(f"Average {period_name} Power Generation: {mean_power if not pd.isna(mean_power) else 'No data'}")

# Step 4: Create DataFrame and save to CSV
df = pd.DataFrame({'Period': list(period_means.keys()), 'Average Power Generation (kWh)': list(period_means.values())})
df.to_csv('power_generation_by_period_data.csv', index=False)
print("Plotted data saved as 'power_generation_by_period_data.csv'.")

# Step 5: Create the plot
plt.figure(figsize=(8, 6))
plt.bar(period_means.keys(), period_means.values(), color=['green', 'black', 'skyblue', 'white'], edgecolor='black')
plt.xlabel('Period')
plt.ylabel('Average Power Generation (kWh)')
plt.title('Average Power Generation by Period')
plt.grid(True, linestyle='--', alpha=0.7)

# Set y-axis range from 0 to 200,000 kWh
plt.ylim(0, 200000)

# Adjust layout and save
plt.tight_layout()
plt.savefig('power_generation_by_period.png', dpi=300, bbox_inches='tight')
plt.close()
print("Chart saved as 'power_generation_by_period.png'.")

Average Baseline Power Generation: 137998.61290322582
Average 2020s Power Generation: 107479.07352833333
Average 2050s Power Generation: 114878.09475433333
Average 2080s Power Generation: 114492.24813066669
Plotted data saved as 'power_generation_by_period_data.csv'.
Chart saved as 'power_generation_by_period.png'.


In [6]:
import pandas as pd
import matplotlib.pyplot as plt

# Set font properties
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 12

# Step 1: Read the CSV file
file_path = r"G:\learn\python_works\google_ee\rudra\datas\attatchments\power_generation.csv"
try:
    power_generation_data = pd.read_csv(file_path)
except FileNotFoundError:
    print(f"Error: File at {file_path} not found. Please check the path.")
    exit()

# Step 2: Define periods
periods = {
    'Baseline': (1981, 2011),
    '2020s': (2011, 2040),
    '2050s': (2041, 2070),
    '2080s': (2071, 2100)
}

# Step 3: Calculate average power generation and relative change
period_means = {}
baseline_mean = power_generation_data[(power_generation_data['Year'] >= periods['Baseline'][0]) & (power_generation_data['Year'] <= periods['Baseline'][1])]['Power_generation'].mean()
relative_changes = {}

for period_name, (start_year, end_year) in periods.items():
    if period_name == 'Baseline':
        period_means[period_name] = baseline_mean
    else:
        period_data = power_generation_data[(power_generation_data['Year'] >= start_year) & (power_generation_data['Year'] <= end_year)]
        period_mean = period_data['Power_generation'].mean()
        period_means[period_name] = period_mean if not pd.isna(period_mean) else 0
        relative_changes[period_name] = ((period_mean - baseline_mean) / baseline_mean * 100) if not pd.isna(period_mean) and baseline_mean != 0 else 0
    print(f"Average {period_name} Power Generation: {period_means[period_name] if not pd.isna(period_means[period_name]) else 'No data'}")

# Step 4: Create DataFrame and save to CSV
df = pd.DataFrame({
    'Period': list(period_means.keys()),
    'Average Power Generation (kWh)': list(period_means.values()),
    'Relative Change (%)': ['N/A'] + [relative_changes.get(p, 0) for p in ['2020s', '2050s', '2080s']]
})
df.to_csv('power_generation_relative_change_data.csv', index=False)
print("Plotted data saved as 'power_generation_relative_change_data.csv'.")

# Step 5: Create the plot
plt.figure(figsize=(8, 6))
x = np.arange(len(period_means))
plt.bar(x, [period_means[p] for p in period_means.keys()], color=['green', 'black', 'skyblue', 'white'], edgecolor='black', label='Average Power Generation')
plt.xlabel('Period')
plt.ylabel('Average Power Generation (kWh)')
plt.title('Average Power Generation and Relative Change by Period (Baseline: 1981–2011)')
plt.xticks(x, period_means.keys())
plt.grid(True, linestyle='--', alpha=0.7)

# Annotate relative changes for future periods
for i, period in enumerate(['2020s', '2050s', '2080s']):
    change = relative_changes.get(period, 0)
    plt.text(i + 1, period_means[period] + 5000, f'{change:.1f}%', ha='center')

# Set y-axis range from 0 to 200,000 kWh
plt.ylim(0, 200000)

# Adjust layout and save
plt.tight_layout()
plt.savefig('power_generation_relative_change.png', dpi=300, bbox_inches='tight')
plt.close()
print("Chart saved as 'power_generation_relative_change.png'.")

Average Baseline Power Generation: 137998.61290322582
Average 2020s Power Generation: 107479.07352833333
Average 2050s Power Generation: 114878.09475433333
Average 2080s Power Generation: 114492.24813066669
Plotted data saved as 'power_generation_relative_change_data.csv'.
Chart saved as 'power_generation_relative_change.png'.
