# Install and import dependencies

In [212]:
# Importing necessary libraries

import os
import pandas as pd                     # Used for dataframes
import matplotlib.pyplot as plt         # Used for plotting
import matplotlib.dates as mdates       # Used for plotting and handling dates
import numpy as np                      # used for inserting line of regression

In [213]:
# Function to export graphs into a folder named 'output'

def graph_export(filename, fig=None, export_dir='output', dpi=300):

    # Create the export directory if it doesn't already exist
    if not os.path.exists(export_dir):
        try:
            os.makedirs(export_dir)
        except OSError as e:
            return  # Exit if directory creation fails

    # Define the full path for the plot file
    full_path = os.path.join(export_dir, filename)

    try:
        if fig is None:
            # Get current figure if no specific figure is provided
            fig_to_save = plt.gcf()
            if not fig_to_save.get_axes(): # Check if current figure has any axes
                print("No active plot to save.")
                return
            fig_to_save.savefig(full_path, dpi=dpi)
        else:
            fig.savefig(full_path, dpi=dpi)
        
    except Exception as e:
        print(f"Error saving plot to '{full_path}': {e}")

# Data Import and Cleansing

In [214]:
# Reading in data using Pandas and looking at first 5 rows

weather = pd.read_csv('Los_Angeles_Weather_CA 2022-01-01 to 2022-12-31.csv')

weather.head(5)

Unnamed: 0,location,datetime,temp,feels_like,dew,humidity,precip,precip_prob,precip_type,snow,...,sea_level_pressure,cloud_cover,visibility,solar_radiation,solar_energy,uv_index,severe_risk,conditions,icon,stations
0,"Los Angeles,USA",2022-01-01T00:00:00,48.3,48.3,41.6,77.68,0.0,0,,0,...,1011.4,25.0,8.8,0.0,0.0,0,,Partially cloudy,partly-cloudy-night,KHHR:LAX:KBUR:KCQT:HHR:DTLA USC Campus
1,"Los Angeles,USA",2022-01-01T01:00:00,48.9,48.9,43.2,80.38,0.0,0,,0,...,1012.0,86.4,3.9,0.0,0.0,0,,Partially cloudy,partly-cloudy-night,KHHR:LAX:KBUR:KCQT:HHR:DTLA USC Campus
2,"Los Angeles,USA",2022-01-01T02:00:00,49.9,49.3,42.0,74.19,0.0,0,,0,...,1013.1,81.8,9.6,0.0,0.0,0,,Partially cloudy,partly-cloudy-night,KHHR:LAX:KBUR:KCQT:HHR:DTLA USC Campus
3,"Los Angeles,USA",2022-01-01T03:00:00,48.2,48.2,39.0,70.43,0.0,0,,0,...,1013.9,33.5,9.8,0.0,0.0,0,,Partially cloudy,partly-cloudy-night,KHHR:LAX:KBUR:KCQT:HHR:DTLA USC Campus
4,"Los Angeles,USA",2022-01-01T04:00:00,48.1,48.1,38.7,69.61,0.0,0,,0,...,1014.2,86.1,9.9,0.0,0.0,0,,Partially cloudy,partly-cloudy-night,KHHR:LAX:KBUR:KCQT:HHR:DTLA USC Campus


In [215]:
# Converts column and creates date and time columns only if datetime exists
# Used to avoid errors when re-running cells

if 'datetime' in weather.columns:
    weather['datetime'] = pd.to_datetime(weather['datetime'])   # Converts column into datetime if not done so already

    weather['date'] = weather['datetime'].dt.date   # Creates new column 'date' from datetime column
    weather['time'] = weather['datetime'].dt.time   # Creates new column 'time' from datetime column
    weather['hour'] = weather['datetime'].dt.hour   # Creates new column 'hour' from dateime column

# Drops columns if they exist
for col in ['location', 'datetime', 'icon', 'stations']:
    if col in weather.columns:
        weather = weather.drop(col, axis =1)

# Moves newly created columns to left of dataframe for easier reading
weather.insert(0,'date', weather.pop("date"))
weather.insert(1,'time', weather.pop("time"))
weather.insert(2,'hour', weather.pop("hour"))

weather.head(5)

Unnamed: 0,date,time,hour,temp,feels_like,dew,humidity,precip,precip_prob,precip_type,...,wind_speed,wind_dir,sea_level_pressure,cloud_cover,visibility,solar_radiation,solar_energy,uv_index,severe_risk,conditions
0,2022-01-01,00:00:00,0,48.3,48.3,41.6,77.68,0.0,0,,...,0.4,1.0,1011.4,25.0,8.8,0.0,0.0,0,,Partially cloudy
1,2022-01-01,01:00:00,1,48.9,48.9,43.2,80.38,0.0,0,,...,0.2,1.0,1012.0,86.4,3.9,0.0,0.0,0,,Partially cloudy
2,2022-01-01,02:00:00,2,49.9,49.3,42.0,74.19,0.0,0,,...,3.3,17.0,1013.1,81.8,9.6,0.0,0.0,0,,Partially cloudy
3,2022-01-01,03:00:00,3,48.2,48.2,39.0,70.43,0.0,0,,...,0.4,357.0,1013.9,33.5,9.8,0.0,0.0,0,,Partially cloudy
4,2022-01-01,04:00:00,4,48.1,48.1,38.7,69.61,0.0,0,,...,0.4,357.0,1014.2,86.1,9.9,0.0,0.0,0,,Partially cloudy


In [216]:
# Fills in empty or NaN cells with 0

weather.fillna(0,inplace = True)

weather.head(5)

Unnamed: 0,date,time,hour,temp,feels_like,dew,humidity,precip,precip_prob,precip_type,...,wind_speed,wind_dir,sea_level_pressure,cloud_cover,visibility,solar_radiation,solar_energy,uv_index,severe_risk,conditions
0,2022-01-01,00:00:00,0,48.3,48.3,41.6,77.68,0.0,0,0,...,0.4,1.0,1011.4,25.0,8.8,0.0,0.0,0,0.0,Partially cloudy
1,2022-01-01,01:00:00,1,48.9,48.9,43.2,80.38,0.0,0,0,...,0.2,1.0,1012.0,86.4,3.9,0.0,0.0,0,0.0,Partially cloudy
2,2022-01-01,02:00:00,2,49.9,49.3,42.0,74.19,0.0,0,0,...,3.3,17.0,1013.1,81.8,9.6,0.0,0.0,0,0.0,Partially cloudy
3,2022-01-01,03:00:00,3,48.2,48.2,39.0,70.43,0.0,0,0,...,0.4,357.0,1013.9,33.5,9.8,0.0,0.0,0,0.0,Partially cloudy
4,2022-01-01,04:00:00,4,48.1,48.1,38.7,69.61,0.0,0,0,...,0.4,357.0,1014.2,86.1,9.9,0.0,0.0,0,0.0,Partially cloudy


In [217]:
# Splitting the data into 3 categories: day, evening, night
# Day: 7 AM - 7 PM
# Evening: 7 PM - 11 PM
# Night: 11 PM - 7 AM

# Split time period dataframes
weather_day = weather[(weather['hour'] >= 7) & (weather['hour'] < 19)]
weather_evening = weather[(weather['hour'] >= 19) & (weather['hour'] < 23)]
weather_night = weather[((weather['hour'] >= 19 & (weather['hour'] < 23)))]

In [218]:
# Using split time periods to calculate average values for conditions

# Dataframe for average conditions
avg_weather = weather.groupby('date').agg({
    'temp': 'mean',
    'humidity': 'mean',
    'wind_speed': 'mean',
    'cloud_cover': 'mean',
    'visibility': 'mean',
    'uv_index': 'mean',
    'wind_gust': 'mean',
    'wind_dir': 'mean',
    'precip': 'mean',
    'dew': 'mean',
    'cloud_cover' : 'mean',
    'solar_radiation' : 'mean',
    'feels_like' : 'mean'
}).reset_index()

# Dataframe for average daytime conditions
avg_daytime_weather = weather_day.groupby('date').agg({
    'temp': 'mean',
    'humidity': 'mean',
    'wind_speed': 'mean',
    'cloud_cover': 'mean',
    'visibility': 'mean',
    'uv_index': 'mean'
}).reset_index()

# Dataframe for average evening conditions
avg_evening_weather = weather_evening.groupby('date').agg({
    'temp': 'mean',
    'humidity': 'mean',
    'wind_speed': 'mean',
    'cloud_cover': 'mean',
    'visibility': 'mean',
    'uv_index': 'mean'
}).reset_index()

# Dataframe for average night conditions
avg_night_weather = weather_night.groupby('date').agg({
    'temp': 'mean',
    'humidity': 'mean',
    'wind_speed': 'mean',
    'cloud_cover': 'mean',
    'visibility': 'mean',
    'uv_index': 'mean'
}).reset_index()

# Analyzing Data

### Line Graphs – Average Temperature Over Time

In [219]:
fig, ax = plt.subplots(figsize=(10,5), dpi = 300)

# Plotting each line on graph
ax.plot(avg_daytime_weather['date'], avg_daytime_weather['temp'], color = 'steelblue', label = 'Day', linewidth = 1)
ax.plot(avg_evening_weather['date'], avg_evening_weather['temp'], color = 'darkorange', label = 'Evening', linewidth = 1)
ax.plot(avg_night_weather['date'], avg_night_weather['temp'], color = 'forestgreen', label = 'Night', linewidth = 1)

# X-Axis Date formatting
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))

# Plot Labels
ax.set_title('Average Temperature by Time of Day (2022)')
ax.set_xlabel('Date')
ax.set_ylabel('Temperature (°F)')
ax.legend()

# Plot appearance
plt.grid(True, alpha=0.65)
plt.xticks(rotation = 45)
plt.tight_layout()

graph_export('Average Temperature by Time of Day')

#plt.show()

In [220]:
fig, ax = plt.subplots(figsize=(10, 5), dpi = 300)


# Plot line
ax.plot(avg_daytime_weather['date'], avg_daytime_weather['temp'], color = 'steelblue', label = 'Daytime')

# Title and labels
ax.set_title('Average Daytime Temperature (2022)')
ax.set_xlabel('Date')
ax.set_ylabel('Temperature (°F)')

# Date formatting
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))  # Example: Jan 01

# Plot appearance
plt.grid(True)
plt.xticks(rotation = 45)
plt.tight_layout()

# Saves Image to directory
graph_export('Average Daytime Temperature')

#plt.show()



In [221]:
fig, ax = plt.subplots(figsize=(10, 5), dpi = 300)

# Plot line
ax.plot(avg_evening_weather['date'], avg_evening_weather['temp'], color = 'darkorange')

# Title and labels
ax.set_title('Average Evening Temperature (2022)')
ax.set_xlabel('Date')
ax.set_ylabel('Temperature (°F)')

# Date formatting
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))  # Example: Jan 01

# Plot appearance
plt.grid(True)
plt.xticks(rotation = 45)
plt.tight_layout()

graph_export('Average Evening Temperature')

#plt.show()


In [222]:
fig, ax = plt.subplots(figsize=(10, 5), dpi = 300)

# Plot line
ax.plot(avg_night_weather['date'], avg_night_weather['temp'], color = 'forestgreen')

# Title and labels
ax.set_title('Average Night Temperature (2022)')
ax.set_xlabel('Date')
ax.set_ylabel('Temperature (°F)')

# Date formatting
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))

# Plot appearance
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()

graph_export('Average Night Temperature')

# plt.show()


In [223]:
fig, ax = plt.subplots(figsize=(10, 5), dpi = 300)

# Plot Data
ax.plot(avg_weather['date'], avg_weather['temp'], color = 'red',linewidth = 1)
ax.plot(avg_weather['date'], avg_weather['feels_like'], color = 'blue', linewidth = 1)

ax.set_title('Average Temperate vs. Feels Like')
ax.set_xlabel('Date')
ax.set_ylabel('Temperature (°F)')

# Date formatting
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))

plt.grid(True)
plt.xticks(rotation = 45)
plt.tight_layout()

graph_export('Average Temperature vs Feels Like')

#plt.show()

### Scatter Plots

In [230]:
# Plot sizing
plt.figure(figsize=(14, 6),dpi=300)

# Temperature x Humidity
plt.subplot(1, 2, 1)
plt.scatter(avg_weather['humidity'], avg_weather['temp'], color = 'blue', alpha = 0.5)
plt.title('Average Temperature vs. Humidity')
plt.xlabel('Humidity (%)')
plt.ylabel('Average Temperature (°F)')
x = avg_weather['humidity']
y = avg_weather['temp']
m, c = np.polyfit(x, y, 1) 
plt.plot(x, m * x + c, color = 'red')
plt.grid()

# Temperature x Wind Speed
plt.subplot(1, 2, 2)
plt.scatter(avg_weather['wind_speed'], avg_weather['temp'], color = 'blue', alpha = 0.5)
plt.title('Average Temperature vs Wind Speed')
plt.xlabel('Wind Speed (mph)')
plt.ylabel('Average Temperature (°F)')
x = avg_weather['wind_speed']
y = avg_weather['temp']
m, c = np.polyfit(x, y, 1)
plt.plot(x, m * x + c, color = 'red')
plt.grid(True, alpha = 0.65)

plt.tight_layout()

graph_export('Avg Temperature vs Humidity and Wind Speed')

#plt.show()

In [225]:
# Plot sizing
plt.figure(figsize=(14, 6),dpi=300)

# Temperature x UV Index
plt.subplot(1, 2, 1)
plt.scatter(avg_weather['uv_index'], avg_weather['temp'], color = 'blue', alpha = 0.5)
plt.title('Average Temperature vs. UV Index')
plt.xlabel('UV Index')
plt.ylabel('Average Temperature (°F)')
x = avg_weather['uv_index']
y = avg_weather['temp']
m, c = np.polyfit(x, y, 1)
plt.plot(x, m * x + c, color = 'red')
plt.grid()

# Temperature x Wind Speed
plt.subplot(1, 2, 2)
plt.scatter(avg_weather['temp'], avg_weather['feels_like'], color = 'blue', alpha = 0.5)
plt.title('Average Temperature vs. Feels Like')
plt.xlabel('Temperature')
plt.ylabel('Feels Like')
x = avg_weather['temp']
y = avg_weather['feels_like']
m, c = np.polyfit(x, y, 1)
plt.plot(x, m * x + c, color = 'red')
plt.grid(True, alpha = 0.65)

plt.tight_layout()

graph_export('Avg Temp v UV Index - Avg Temp v Feels Like')

# plt.show()

In [226]:

# Plot sizing
plt.figure(figsize=(14, 6),dpi=300)

# Temperature x UV Index
plt.subplot(1, 2, 1)
plt.scatter(avg_weather['uv_index'], avg_weather['cloud_cover'], color = 'blue', alpha = 0.5)
plt.title('UV Index vs. Cloud Coverage')
plt.xlabel('UV Index')
plt.ylabel('Cloud Coverage')
x = avg_weather['uv_index']
y = avg_weather['cloud_cover']
m, c = np.polyfit(x, y, 1)
plt.plot(x, m * x + c, color = 'red')
plt.grid()

# Temperature x Wind Speed
plt.subplot(1, 2, 2)
plt.scatter(avg_weather['cloud_cover'], avg_weather['solar_radiation'], color = 'blue', alpha = 0.5)
plt.title('Average Cloud Coverage vs. Solar Radiation')
plt.xlabel('Cloud Coverage (%)')
plt.ylabel('Solar Radiation (W/m^2)')
x = avg_weather['cloud_cover']
y = avg_weather['solar_radiation']
m, c = np.polyfit(x, y, 1)
plt.plot(x, m * x + c, color = 'red')
plt.grid(True, alpha = 0.65)

plt.tight_layout()

graph_export('UV Index v Cloud Cover - Cloud Cover v Solar Rad')

# plt.show()

### Polar Scatter Plot

In [227]:
# converting wind direction column into radians
wind_dir_rad = np.deg2rad(avg_weather['wind_dir'])

# Polar plot
plt.figure(figsize = (8,8), dpi = 300)
ax = plt.subplot(111, polar = True)

sc = ax.scatter(wind_dir_rad, avg_weather['wind_speed'], c = avg_weather['wind_speed'], cmap = 'plasma', alpha = 0.75)
cbar = plt.colorbar(sc, ax = ax, pad = 0.1)
cbar.set_label('Wind Speed (MPH)')

# Plot appearance
ax.set_theta_zero_location('N')
ax.set_theta_direction(-1)
ax.set_title('Wind Direction vs Wind Speed', va = 'bottom')

graph_export('Wind Dir v Wind Speed')

# plt.show()