# DSE CODING PROJECT

Importing Libraries

In [55]:
import pandas as pd
import geopandas as gpd 
import streamlit as st
import matplotlib.pyplot as plt
import import_ipynb
import data_loading
from data_loading import load_data  # This is the function defined in the first file
from geopy.distance import geodesic
import numpy as np

Loading data

In [56]:
data = load_data(r"C:\Users\greta\OneDrive\Desktop\GlobalLandTemperaturesByMajorCity.csv")

Calculating Temperature Range for Each City

In [57]:
temp_ranges = data.groupby('City')['AverageTemperature'].agg(lambda x: x.max() - x.min()) #calculates the temperature range for each city (the difference between the highest and lowest temperatures)
high_variance_cities = temp_ranges[temp_ranges > 15].index  #  filters out cities where the temperature range is above 15°C

Setting the App Title and Instructions

In [58]:
st.title("Global Temperature Change Visualization Over Time")
st.write("Explore the change in temperatures over time and identify cities with large temperature ranges.")



Adding a Year Slider

In [59]:
year = st.slider("Select Year", int(data['Year'].min()), int(data['Year'].max()), step=1)



Filtering Data for the Selected Year

In [60]:
year_data = data[data['Year'] == year]

Loading baseline city data (unique cities with coordinates) for consistent plotting

In [61]:
baseline_cities = data[['City', 'Latitude', 'Longitude']].drop_duplicates()

Converting Data to a GeoDataFrame

In [62]:
gdf_all_cities = gpd.GeoDataFrame(baseline_cities, geometry=gpd.points_from_xy(baseline_cities.Longitude, baseline_cities.Latitude))
gdf_year_data = gpd.GeoDataFrame(year_data, geometry=gpd.points_from_xy(year_data.Longitude, year_data.Latitude))
# creating points on a map for each city using longitude and latitude data.


Loading a World Map Shapefile

In [63]:
world = gpd.read_file(r"C:\Users\greta\OneDrive\Desktop\nat.earth\ne_110m_admin_0_countries.shp")
# a shapefile is a file format for geographic data

Plotting the Map with Enhancements

In [64]:
fig, ax = plt.subplots(figsize=(12, 8)) #creating a blank figure (fig) with a specified size and an axis (ax) to draw on.
world.plot(ax=ax, color='lightgrey', edgecolor='darkgrey')  # World map with borders

<Axes: >

Plotting all of the cities in a light color 

In [65]:
gdf_all_cities.plot(ax=ax, color='lightblue', markersize=20, alpha=0.3, edgecolor='black', label="All Cities")

<Axes: >

Plotting cities with temperature data for the selected year in a distinct color (blue)

In [66]:
gdf.plot(ax=ax, color='blue', markersize=20, alpha=0.5, edgecolor='black', label="Cities with Temperature Data") #markersize=20 controls the size of the markers, while alpha=0.5 makes them slightly transparent.

<Axes: >

Highlighting high variance cities in red with larger markers

In [67]:
high_variance_data = gdf_year_data[gdf_year_data['City'].isin(high_variance_cities)]
high_variance_data.plot(ax=ax, color='red', markersize=50, alpha=0.7, edgecolor='black', label="High Temperature Range Cities")

<Axes: >

Adding titles

In [68]:
plt.title(f"Temperature Distribution in Major Cities - {year}", fontsize=18, fontweight='bold', color='darkblue')
plt.xlabel("Longitude", fontsize=14)
plt.ylabel("Latitude", fontsize=14)


Text(105.59722222222221, 0.5, 'Latitude')

Adding a legend and displaying the map

In [69]:
plt.legend()
st.pyplot(fig)



DeltaGenerator()

Selecting a city to display time-series

In [70]:
selected_city = st.selectbox("Select a city to view historical temperature data:", data['City'].unique())

if selected_city:
    city_data = data[data['City'] == selected_city] #If the city is selected, the code filters data to include only rows for the chosen city.

    if not city_data.empty:
        # Grouping by year and calculating the average temperature 
        yearly_data = city_data.groupby('Year', as_index=False)['AverageTemperature'].mean()

        fig, ax = plt.subplots(figsize=(10, 6))

        # Plotting
        ax.plot(yearly_data['Year'], yearly_data['AverageTemperature'], 
                color='darkred', marker='o', linestyle='-', linewidth=1, markersize=3)
        ax.set_title(f"Average Temperature Change Over Time in {selected_city}", 
                     fontsize=16, fontweight='bold', color='darkblue')
        ax.set_xlabel("Year", fontsize=14, color='gray')
        ax.set_ylabel("Average Temperature (°C)", fontsize=14, color='gray')

        # Adding a grid
        ax.grid(True, linestyle='--', color='gray', alpha=0.7)
    
        # Showing the plot in Streamlit
        st.pyplot(fig)
    else:
        st.write(f"No data available for {selected_city}")




SECOND PART OF THE PROJECT

Starting and destination coordinates 

In [71]:
start_city = "Beijing"
end_city = "Los Angeles"
start_coords = (39.38, 116.53)  # Beijing
end_coords = (34.56, -118.70)   # Los Angeles

Creating a filtered copy of the data for the selected period

In [72]:
route_data = year_data.copy() #route_data will be modified throughout the code, so we are creating a copy to avoid changing year_data directly.

Function to get the three closest cities to a given location

In [73]:
def get_nearest_cities(data, current_coords, num_cities=3):
    data_copy = data.copy()  # Making a copy of the data to avoid modifying the original DataFrame
    # Using a list comprehension to calculate distances and assigning them to the 'Distance' column
    data_copy['Distance'] = [
        geodesic(current_coords, (row['Latitude'], row['Longitude'])).km 
        for _, row in data_copy.iterrows()
    ] #iterating through each row, calculating the distance between current_coords and each city’s coordinates using geodesic, and storing it in the 'Distance' column.
    
    nearby_cities = data_copy.nsmallest(num_cities, 'Distance') #retrieving the closest three cities based on distance.
    return nearby_cities.sort_values(by='AverageTemperature', ascending=False)  # sorting the three cities by temperature, so the function returns the warmest of the closest cities.


Iteratively building the route

In [74]:
route = []
current_coords = start_coords #Initializing the route as an empty list and setting current_coords to the starting location.
#route will store the cities we add to our route as dictionaries with city name, latitude, and longitude.


Loop to build the route

In [75]:


while geodesic(current_coords, end_coords).km > 100:  # Stopping if we're close to LA
    nearby_cities = get_nearest_cities(route_data, current_coords) #Finding the three closest cities to current_coords and selecting the warmest one
    if nearby_cities.empty:
        st.write("No more cities within reach.")
        break
    next_city = nearby_cities.iloc[0]  # Warmest among the closest (choosing the first city in nearby_cities)
    route.append({'City': next_city['City'], 'Latitude': next_city['Latitude'], 'Longitude': next_city['Longitude']}) #Adding next_city to route with its name, latitude, and longitude
    current_coords = (next_city['Latitude'], next_city['Longitude']) # Updating current_coords to the location of next_city, moving us forward in the route.
    route_data = route_data[route_data['City'] != next_city['City']]  # Removing next_city from route_data to avoid revisiting it.

st.write("Route:", " → ".join([city['City'] for city in route]))




Visualizing the route

In [76]:
if route:
    # Reusing the world shapefile from earlier
    fig, ax = plt.subplots(figsize=(12, 8))
    world.plot(ax=ax, color='lightgrey', edgecolor='darkgrey')
    
    # Extracting route coordinates: lats and longs lists store the route’s latitudes and longitudes
    lats = [start_coords[0]] + [city['Latitude'] for city in route] + [end_coords[0]]
    longs = [start_coords[1]] + [city['Longitude'] for city in route] + [end_coords[1]]
    
    # Plotting the route as lines connecting each city
    ax.plot(longs, lats, color='red', linewidth=2, label="Route Line")
    
    # Plotting each city as a point on the route
    ax.plot(longs, lats, 'bo', markersize=5)  # 'bo' makes blue points

    # Adding title and legend
    plt.title("Travel Route Visualization", fontsize=18, fontweight='bold')
    plt.legend()
    
    # Displaying in Streamlit
    st.pyplot(fig)

