<a href="https://colab.research.google.com/github/hashanranasinghe/ML/blob/main/weather.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import folium
import seaborn as sns

In [3]:
df = pd.read_csv('/content/SriLanka_Weather_Dataset.csv')

FileNotFoundError: [Errno 2] No such file or directory: '/content/SriLanka_Weather_Dataset.csv'

In [None]:
df.head(5)

The dataset includes information such as:


* Time: The timestamp of each weather observation.

* Weather Code: A numerical code representing the weather conditions at the given time.

* Temperature: Maximum, minimum, and mean values of 2-meter temperature.

* Apparent Temperature: Maximum, minimum, and mean values of apparent * temperature, which takes into account factors like wind chill or heat index.

* Sunrise and Sunset: The times of sunrise and sunset for each day.

* Shortwave Radiation: Sum of shortwave radiation received during the observation period.

* Precipitation: Total sum of precipitation, including rainfall and snowfall.

* Precipitation Hours: The duration of time with measurable precipitation.

* Wind Speed and Gusts: Maximum values of wind speed and wind gusts at 10 meters above ground level.

* Wind Direction: Dominant wind direction at 10 meters above ground level.

* Evapotranspiration: Reference evapotranspiration (ET0) based on the FAO Penman-Monteith equation.

* Latitude, Longitude, and Elevation: Geographic coordinates and elevation of each city.

* Country and City: Names of the country and city corresponding to each weather observation.




In [None]:
df.info()

In [None]:
print(df.duplicated().sum())

In [None]:
df.describe()

In [None]:
types_of_countries =  df['country'].unique()
print(types_of_countries)

In [None]:
types_of_cities =  df['city'].unique()
print(types_of_cities)
print(len(types_of_cities))

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))

ax.plot(df['city'], df['precipitation_sum'], linewidth=2.0)

ax.set_xlabel('City')
plt.xticks(rotation=80)
ax.set_ylabel('Precipitation Sum')
ax.set_title('Precipitation by City ')

plt.show()

In [None]:
cities = pd.DataFrame({'Cities' : df['city'].unique()})
latitudes = []
longitudes = []

for i in range(cities['Cities'].size):
    lat = df.loc[df['city'] == cities['Cities'].iloc[i]]['latitude'].iloc[0]
    long = df.loc[df['city'] == cities['Cities'].iloc[i]]['longitude'].iloc[0]
    latitudes.append(lat)
    longitudes.append(long)
cities['latitude'] = latitudes; cities['longitude'] = longitudes

In [None]:
city_locations = folium.Map(width = 600, height = 400, location=[7.877083,80.697917], tiles='cartodbpositronnolabels', zoom_start=7, zoom_control=False,
               scrollWheelZoom=False,
               dragging=False)


for idx, row in cities.iterrows():
    folium.Circle(location = [row['latitude'], row['longitude']],
                  radius = 2000,
                 ).add_child(folium.Popup(row['Cities'])).add_to(city_locations)

city_locations

In [None]:
df['time'] = pd.to_datetime(df['time'])
df['year_month'] = df['time'].dt.to_period('M')
monthly_avg_temp = df.groupby('year_month')['temperature_2m_mean'].mean().reset_index()

In [None]:
monthly_avg_temp

In [None]:
plt.figure(figsize=(50, 15))
plt.plot(monthly_avg_temp.year_month.astype(str), monthly_avg_temp.temperature_2m_mean, marker='o', linestyle='-', color='b')
plt.title('Monthly Average Temperature Trend')
plt.xlabel('Time (Year-Month)')
plt.ylabel('Average Temperature (°C)')
plt.xticks(rotation=90)
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
cities = pd.DataFrame({'Cities' : df['city'].unique()})
elevation = []

for i in range(cities['Cities'].size):
    ele = df.loc[df['city'] == cities['Cities'].iloc[i]]['elevation'].iloc[0]
    elevation.append(ele)
cities['elevation'] = elevation;

In [None]:
cities

In [None]:
plt.figure(figsize=(10, 7))
plt.bar(cities['Cities'], cities['elevation'])
plt.xlabel('City')
plt.xticks(rotation=90)
plt.ylabel('Elevation (m)')
plt.title('Elevation of Cities')

In [None]:
df['winddirection_10m_dominant'].min(), df['winddirection_10m_dominant'].max()

In [None]:
df['precipitation_hours'].min(), df['precipitation_hours'].max()

In [None]:
df.head(2)

In [None]:
wind_direction_with_elevation = df.groupby('elevation')['winddirection_10m_dominant'].mean().reset_index()
wind_direction_with_elevation

In [None]:
df.info()

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))

# Plot the ECDF using seaborn for better styling
sns.ecdfplot(df['apparent_temperature_mean'], ax=ax, color='blue', linewidth=2)

# Add titles and labels
ax.set_title('ECDF of Apparent Temperature Mean', fontsize=16)
ax.set_xlabel('Apparent Temperature (°C)', fontsize=14)
ax.set_ylabel('Cumulative Probability', fontsize=14)

# Customize grid for better visibility
ax.grid(True, linestyle='--', alpha=0.7)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
df =df.drop(['year_month'], axis=1)

In [None]:
year_with_wind_max = pd.DataFrame({'time' : df['time'],'windspeed_10m_max':df['windspeed_10m_max'], 'apparent_temperature_max':df['apparent_temperature_max'] })
year_with_wind_max

In [None]:
year_with_wind_max['year'] = year_with_wind_max['time'].dt.to_period('Y')
year_with_wind_max


In [None]:
max_windspeed_year = (
    year_with_wind_max.groupby('year')
    .agg({'windspeed_10m_max': 'max', 'apparent_temperature_max': 'max'})
    .reset_index()
)

max_windspeed_year

In [None]:
plt.figure(figsize=(10, 7))

plt.plot(max_windspeed_year['year'].astype(str),
         max_windspeed_year['windspeed_10m_max'],
         marker='o', linestyle='-', color='blue', label='Max Wind Speed')

plt.plot(max_windspeed_year['year'].astype(str),
         max_windspeed_year['apparent_temperature_max'],
         marker='s', linestyle='--', color='red', label='Max Apparent Temperature')

plt.title('Max Wind Speed and Apparent Temperature Over the Years', fontsize=16)
plt.xlabel('Year', fontsize=14)
plt.ylabel('Value', fontsize=14)
plt.legend(fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)

plt.xticks(rotation=45)
plt.tight_layout()

plt.show()


In [None]:
df.info()