In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
import numpy as np

# Load data
data = pd.read_csv('CSV/roba2.csv')

# Inspect the data
print(data.head())


  Country                 Policy  Year            Result  \
0     JPN  Feed_in_Tariff_System  2011   Initial success   
1     JPN                    NaN  2012  Growing adoption   
2     JPN                    NaN  2013      Major growth   
3     JPN                    NaN  2014  Continued growth   
4     JPN                    NaN  2015     Stable growth   

  Installed_Capacity_MW Cost_Reduction_LCOE_kWh New_Solar_Installation  \
0                52,742                     NaN                  1,296   
1                    32                    0.27                  1,718   
2                    29                    0.26                  6,967   
3                    26                    0.24                  9,740   
4                    25                    0.23                 10,811   

  Carbon_Emissions_Reduction (tons) Economic_Impact  \
0                           200,000        Moderate   
1                           400,000        Moderate   
2                           5

In [27]:
# Handling missing values
data.ffill(inplace=True)  # Forward fill to propagate previous values forward

# Verify if there are still missing values
print(data.isnull().sum())


Country                               0
Policy                                0
Year                                  0
Result                                0
Installed_Capacity_MW                 0
Cost_Reduction_LCOE_kWh               1
New_Solar_Installation                0
Carbon_Emissions_Reduction (tons)     0
Economic_Impact                       0
Public_Awareness_and_Acceptance       0
Disaster                             13
dtype: int64


In [22]:


# Prepare data for linear regression
X = data[['Year']]
y = data['Installed_Capacity_MW']

# Linear regression model
model = LinearRegression()
model.fit(X, y)

# Predict for future years
future_years = np.array([[2025], [2026], [2027]])  # Example years
predictions = model.predict(future_years)

# Print predictions
for year, prediction in zip(future_years, predictions):
    print(f"Predicted installed capacity in {year[0]}: {prediction:.2f} MW")

# Plot the results
plt.figure(figsize=(10,6))
plt.scatter(X, y, color='blue', label='Actual Data')
plt.plot(X, model.predict(X), color='red', label='Linear Fit')
plt.title('Solar Installed Capacity vs Year')
plt.xlabel('Year')
plt.ylabel('Installed Capacity (MW)')
plt.legend()
plt.show()


ValueError: Input y contains NaN.

In [18]:
# Trend analysis for LCOE reduction over the years
plt.figure(figsize=(10,6))
sns.lineplot(data=data, x='Year', y='Cost_Reduction_LCOE_$_kWh', hue='Country')
plt.title('Cost Reduction LCOE ($/kWh) Over Time')
plt.xlabel('Year')
plt.ylabel('Cost Reduction LCOE ($/kWh)')
plt.show()



ValueError: Could not interpret value `Cost_Reduction_LCOE_$_kWh` for parameter `y`

<Figure size 1000x600 with 0 Axes>

In [20]:
data['Cost_Reduction_LCOE_$_kWh'] = data['Cost_Reduction_LCOE_$_kWh'].str.replace(',', '')  # Remove commas
data['Cost_Reduction_LCOE_$_kWh'] = pd.to_numeric(data['Cost_Reduction_LCOE_$_kWh'], errors='coerce')  # Convert to numeric


# Trend analysis for LCOE reduction over the years
plt.figure(figsize=(10,6))
sns.lineplot(data=data, x='Year', y='Cost_Reduction_LCOE_$_kWh', hue='Country')
plt.title('Cost Reduction LCOE ($/kWh) Over Time')
plt.xlabel('Year')
plt.ylabel('Cost Reduction LCOE ($/kWh)')
plt.show()


KeyError: 'Cost_Reduction_LCOE_$_kWh'

In [None]:
# Convert categorical columns into numerical values for correlation
data['Economic_Impact'] = data['Economic_Impact'].map({'Low': 1, 'Moderate': 2, 'High': 3, 'Very high': 4})

# Calculate the correlation matrix
corr_matrix = data[['Installed_Capacity_MW', 'Cost_Reduction_LCOE_$_kWh', 'New_Solar_Installation', 'Carbon_Emissions_Reduction', 'Economic_Impact']].corr()

# Plot the correlation matrix
plt.figure(figsize=(8,6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()


In [None]:
# Future prediction for installed capacity
future_years = np.array([[2025], [2026], [2027], [2028], [2029], [2030]])
future_predictions = model.predict(future_years)

# Display the predictions
predictions_df = pd.DataFrame(future_years, columns=['Year'])
predictions_df['Predicted_Installed_Capacity_MW'] = future_predictions
print(predictions_df)
