# DSE CODING PROJECT

Importing Libraries

In [13]:
import pandas as pd
import geopandas as gpd 
import streamlit as st
import matplotlib.pyplot as plt
import import_ipynb
import data_loading
from data_loading import load_data  # This is the function defined in the first file
import numpy as np

Loading data

In [14]:
data = load_data(r"C:\Users\greta\OneDrive\Desktop\GlobalLandTemperaturesByMajorCity.csv")

Calculating Temperature Range for Each City

In [15]:
temp_ranges = data.groupby('City')['AverageTemperature'].agg(lambda x: x.max() - x.min()) #calculates the temperature range for each city (the difference between the highest and lowest temperatures)
high_variance_cities = temp_ranges[temp_ranges > 15].index  #  filters out cities where the temperature range is above 15°C

Setting the App Title and Instructions

In [16]:
st.title("Global Temperature Change Visualization Over Time")
st.write("Explore the change in temperatures over time and identify cities with large temperature ranges.")



Adding a Year Slider

In [17]:
year = st.slider("Select Year", int(data['Year'].min()), int(data['Year'].max()), step=1)



Filtering Data for the Selected Year

In [18]:
year_data = data[data['Year'] == year]

Converting Data to a GeoDataFrame

In [19]:
gdf = gpd.GeoDataFrame(year_data, geometry=gpd.points_from_xy(year_data.Longitude, year_data.Latitude))
# creating points on a map for each city using longitude and latitude data, then converting year_data to a GeoDataFrame, allowing to plot it on a map.


Loading a World Map Shapefile

In [20]:
file_p = r"C:\Users\greta\OneDrive\Desktop\nat.earth\ne_110m_admin_0_countries.shp"
world = gpd.read_file(file_p)
# a shapefile is a file format for geographic data

Plotting the Map with Enhancements

In [None]:
fig, ax = plt.subplots(figsize=(12, 8)) #creating a blank figure (fig) with a specified size and an axis (ax) to draw on.
world.plot(ax=ax, color='lightgrey', edgecolor='darkgrey')  # World map with borders

<Axes: >

Plotting all cities in blue with different sizes for better visibility

In [None]:
gdf.plot(ax=ax, color='blue', markersize=20, alpha=0.5, edgecolor='black', label="Other Cities") #markersize=20 controls the size of the markers, while alpha=0.5 makes them slightly transparent.

<Axes: >

Highlighting high variance cities in red with larger markers

In [23]:
high_variance_data = gdf[gdf['City'].isin(high_variance_cities)]
high_variance_data.plot(ax=ax, color='red', markersize=50, alpha=0.7, edgecolor='black', label="High Temp Range Cities")

<Axes: >

Adding titles

In [None]:
plt.title(f"Temperature Distribution in Major Cities - {year}", fontsize=18, fontweight='bold', color='darkblue')
plt.xlabel("Longitude", fontsize=14)
plt.ylabel("Latitude", fontsize=14)


Text(105.59722222222221, 0.5, 'Latitude')

Customizing ticks

In [25]:
ax.tick_params(axis='both', which='major', labelsize=12)
ax.set_xticks(np.arange(-180, 181, 30))  # Setting x-ticks every 30 degrees
ax.set_yticks(np.arange(-90, 91, 30))    # Setting y-ticks every 30 degrees
ax.grid(color='gray', linestyle='--', linewidth=0.5, alpha=0.5)  # Adding a grid

Adding a legend and displaying the map

In [26]:
plt.legend()
st.pyplot(fig)



DeltaGenerator()

Selecting a city to display time-series

In [None]:
selected_city = st.selectbox("Select a city to view historical temperature data:", data['City'].unique())

if selected_city:
    city_data = data[data['City'] == selected_city] #If the city is selected, the code filters data to include only rows for the chosen city.

    if not city_data.empty:
        # Ensuring 'Date' is a datetime object
        city_data['Date'] = pd.to_datetime(city_data['dt'], errors='coerce')
        
        # Extracting year from the date and creating a new column
        city_data['Year'] = city_data['Date'].dt.year
        
        # Grouping by year and calculating the average temperature for each year
        yearly_data = city_data.groupby('Year', as_index=False)['AverageTemperature'].mean()

        fig, ax = plt.subplots(figsize=(10, 6))

        # Plot with enhancements
        ax.plot(yearly_data['Year'], yearly_data['AverageTemperature'], 
                color='darkred', marker='o', linestyle='-', linewidth=1, markersize=3)
        ax.set_title(f"Average Temperature Change Over Time in {selected_city}", 
                     fontsize=16, fontweight='bold', color='darkblue')
        ax.set_xlabel("Year", fontsize=14, color='gray')
        ax.set_ylabel("Average Temperature (°C)", fontsize=14, color='gray')

        # Add grid and customize tick parameters
        ax.grid(True, linestyle='--', color='gray', alpha=0.7)
        ax.tick_params(axis='both', which='major', labelsize=12)

        # Show the plot in Streamlit
        st.pyplot(fig)
    else:
        st.write(f"No data available for {selected_city}")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_data['Date'] = pd.to_datetime(city_data['dt'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_data['Year'] = city_data['Date'].dt.year
