In [3]:
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, avg, sum as _sum

# Load the dataset
file_path = '/content/World Energy Consumption.csv'
data = pd.read_csv(file_path)

# Filter relevant columns and handle missing values
data = data[[
    "country", "year", "population", "gdp", "biofuel_consumption",
    "solar_share_elec", "wind_share_elec"
]].dropna()

# Convert to Spark DataFrame
spark = SparkSession.builder \
    .appName("Energy Consumption Optimization") \
    .getOrCreate()
spark_data = spark.createDataFrame(data)

# Analyze average renewable energy shares by country
renewable_shares = spark_data.groupBy("country").agg(
    avg("solar_share_elec").alias("avg_solar_share"),
    avg("wind_share_elec").alias("avg_wind_share")
).orderBy(col("avg_solar_share").desc(), col("avg_wind_share").desc())

# Identify countries with the highest renewable energy adoption
top_renewable_countries = renewable_shares.limit(10)

# Show results
print("Top 10 Countries by Renewable Energy Share:")
top_renewable_countries.show()

# Save results to disk
output_path = "optimized_energy_results"
top_renewable_countries.write.csv(f"{output_path}/top_countries", header=True)

# Stop Spark session
spark.stop()


Top 10 Countries by Renewable Energy Share:
+-----------+------------------+-------------------+
|    country|   avg_solar_share|     avg_wind_share|
+-----------+------------------+-------------------+
|      Italy|3.9041333333333332| 3.5147333333333335|
|      Spain|2.2370526315789476| 11.850736842105265|
| Luxembourg| 2.069551724137931| 3.9652758620689657|
|    Germany|1.7826296296296296|  5.390666666666667|
|     Greece|1.6276896551724136| 3.2833448275862067|
|      Japan|1.5212352941176472| 0.3737647058823529|
|     Cyprus|1.4255833333333332| 2.8690833333333337|
|    Romania|           1.22475|  5.566666666666667|
|Switzerland|1.2080909090909093|0.13345454545454546|
|    Denmark| 1.060230769230769|  31.31161538461538|
+-----------+------------------+-------------------+

