## Daily Challenge: NumPy, Pandas, and Matplotlib Integration

### 1. Data Preparation:

Hint 1: Use np.random.uniform(low, high, size) to generate the temperature data. \
Hint 2: Create a DataFrame using pd.DataFrame(data, index, columns) with appropriate index and columns. 

Use NumPy to generate a synthetic dataset representing average monthly temperatures (in degrees Celsius) for 12 months across 10 different cities. The temperatures should range from -5 to 35 degrees. 

Convert this NumPy array into a Pandas DataFrame, adding city names as index and months as columns.

In [1]:
# !pip install geopy

In [2]:
import numpy as np
import pandas as pd

#  Capitals of the world
capitals = ['Tel Aviv', 'London', 'Paris', 'Berlin', 'Tokyo', 'Beijing', 'Moscow', 'Washington', 'Ottawa']

months = pd.date_range(start='2024-01-01', end='2024-12-01', freq='MS').strftime('%B')

In [3]:
from geopy.geocoders import Nominatim

# I used geopy to get latitude for futher temperature generation
gl = Nominatim(user_agent="di_challenge")
capitals_coord= [gl.geocode(city, language='en', timeout=100) for city in capitals]

capitals_coord

[Location(Tel-Aviv, Tel Aviv Subdistrict, Tel-Aviv District, Israel, (32.0852997, 34.7818064, 0.0)),
 Location(London, Greater London, England, United Kingdom, (51.4893335, -0.14405508452768728, 0.0)),
 Location(Paris, Ile-de-France, Metropolitan France, France, (48.8588897, 2.3200410217200766, 0.0)),
 Location(Berlin, Germany, (52.510885, 13.3989367, 0.0)),
 Location(Tokyo, Japan, (35.6821936, 139.762221, 0.0)),
 Location(Beijing, China, (40.190632, 116.412144, 0.0)),
 Location(Moscow, Central Federal District, Russia, (55.7505412, 37.6174782, 0.0)),
 Location(Washington, United States, (47.2868352, -120.212613, 0.0)),
 Location(Ottawa, Eastern Ontario, Ontario, Canada, (45.4208777, -75.6901106, 0.0))]

In [4]:
import numpy as np
import random

# I thought that it would be interesting to try to generate temperature close to the actual
# I decided to use latitude and month to account for the effect of seasonality and closeness to the equator
# But it turns out that it is not that easy to find a good formula
# Here is the best that I could come up with

def generate_temperature(month, latitude, base_temp=0, mon_adj=8, lat_adj=11, temp_range=2):
    # Assuming months close to Jan are the coldest and to July are the warmest
    # We can use cosine function so that factor would approach -1 the closer month is to Jan and 1 for July
    # We achieve this by scaling month so that period is 12 and then adding pi to shift the cosine function to the right
    # I've researched how temp changes monthly througt the year and found that this approach gives good results
    mon_scale = np.cos(((month - 1) * 2 * np.pi / 12) + np.pi)
    
    # Here we adjust for the latitude.
    # So it will be 1 the closer it to the equator and -1 the closer it to the poles
    lat_scale = np.cos(np.pi * abs(latitude) / 90)
    
    # So we use base temperature and adjust it for the month and latitude
    adj_temp = base_temp + mon_scale * mon_adj  + lat_scale * lat_adj
    
    return np.random.uniform(adj_temp - temp_range, adj_temp + temp_range)

In [5]:
import plotly.graph_objects as go
import random
import plotly.colors as colors

# Here I tried to find best values for monthly and latitude adjustments
# But if i set up weather limit close to actual weather, it is impossible 
# to find corresponding values for mon_adj and lat_adj

months_num = list(range(1, 13))
fig = go.Figure()
plots = 0

for _ in range(1000):
    max_temp = 30
    mon_adj = random.randint(1, max_temp)
    lat_adj = random.randint(1, max_temp)
    tel_aviv_temps = [generate_temperature(i, 32, mon_adj, lat_adj) for i in months_num]
    moscow_temps = [generate_temperature(i, 52, mon_adj, lat_adj) for i in months_num]
    
    tel_aviv_max = np.max(tel_aviv_temps)
    tel_aviv_min = np.min(tel_aviv_temps)
    moscow_max = np.max(moscow_temps)
    moscow_min = np.min(moscow_temps)
    
    if tel_aviv_min > -5 and moscow_max > 10 and tel_aviv_max < 35 and moscow_min < 0:
        color = colors.sample_colorscale(colors.PLOTLY_SCALES['Rainbow'], random.random())[0]

        fig.add_trace(go.Scatter(x=months_num, y=tel_aviv_temps, mode='lines', 
                                name=f'Tel Aviv (32°) {mon_adj} {lat_adj} {tel_aviv_max:.2f} {tel_aviv_min:.2f}', line=dict(color=color, width=2)))
        fig.add_trace(go.Scatter(x=months_num, y=moscow_temps, mode='lines', 
                             name=f'Moscow (52°) {mon_adj} {lat_adj} {moscow_max:.2f} {moscow_min:.2f}', line=dict(color=color, dash='dash', width=2)))
        plots += 1
        if plots > 10:
            break
    
fig.update_layout(
    legend=dict(xanchor='left', yanchor='top'),
    width=1200,
    height=600,
)

fig.show()

# Good values
# 8 20
# 9 22
# 10 17
# 8 11


In [6]:
help(capitals_coord[0])

Help on Location in module geopy.location object:

class Location(builtins.object)
 |  Location(address, point, raw)
 |
 |  Contains a parsed geocoder response. Can be iterated over as
 |  ``(location<String>, (latitude<float>, longitude<Float))``.
 |  Or one can access the properties ``address``, ``latitude``,
 |  ``longitude``, or ``raw``. The last
 |  is a dictionary of the geocoder's response for this item.
 |
 |  Methods defined here:
 |
 |  __eq__(self, other)
 |      Return self==value.
 |
 |  __getitem__(self, index)
 |      Backwards compatibility with geopy<0.98 tuples.
 |
 |  __getstate__(self)
 |      Helper for pickle.
 |
 |  __init__(self, address, point, raw)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |
 |  __iter__(self)
 |
 |  __len__(self)
 |
 |  __ne__(self, other)
 |      Return self!=value.
 |
 |  __repr__(self)
 |      Return repr(self).
 |
 |  __setstate__(self, state)
 |
 |  __str__(self)
 |      Return str(self).
 |
 |  ------------

In [7]:
capitals_coord[0].raw


{'place_id': 374285331,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',
 'osm_type': 'relation',
 'osm_id': 1382494,
 'lat': '32.0852997',
 'lon': '34.7818064',
 'class': 'boundary',
 'type': 'administrative',
 'place_rank': 16,
 'importance': 0.6499881947160822,
 'addresstype': 'city',
 'name': 'Tel-Aviv',
 'display_name': 'Tel-Aviv, Tel Aviv Subdistrict, Tel-Aviv District, Israel',
 'boundingbox': ['32.0293437', '32.1469766', '34.7391310', '34.8522617']}

In [8]:
months_df = pd.DataFrame({'month': months, 'mon_num': range(1, 13)})
months_df

Unnamed: 0,month,mon_num
0,January,1
1,February,2
2,March,3
3,April,4
4,May,5
5,June,6
6,July,7
7,August,8
8,September,9
9,October,10


In [9]:
city_df = pd.DataFrame({
    'city': [city.raw['name'] for city in capitals_coord],
    'latitude': [city.latitude for city in capitals_coord],
    'longitude': [city.longitude for city in capitals_coord],
    'country': [city.raw['display_name'].split(',')[-1] for city in capitals_coord]
})

city_df



Unnamed: 0,city,latitude,longitude,country
0,Tel-Aviv,32.0853,34.781806,Israel
1,London,51.489334,-0.144055,United Kingdom
2,Paris,48.85889,2.320041,France
3,Berlin,52.510885,13.398937,Germany
4,Tokyo,35.682194,139.762221,Japan
5,Beijing,40.190632,116.412144,China
6,Moscow,55.750541,37.617478,Russia
7,Washington,47.286835,-120.212613,United States
8,Ottawa,45.420878,-75.690111,Canada


In [10]:
city_temp = pd.merge(months_df, city_df, how='cross')

city_temp['temperature'] = city_temp.apply(lambda x: generate_temperature(x['mon_num'], x['latitude']), axis=1)

city_temp

Unnamed: 0,month,mon_num,city,latitude,longitude,country,temperature
0,January,1,Tel-Aviv,32.085300,34.781806,Israel,-1.614218
1,January,1,London,51.489334,-0.144055,United Kingdom,-10.392227
2,January,1,Paris,48.858890,2.320041,France,-8.806396
3,January,1,Berlin,52.510885,13.398937,Germany,-10.437891
4,January,1,Tokyo,35.682194,139.762221,Japan,-4.023190
...,...,...,...,...,...,...,...
103,December,12,Tokyo,35.682194,139.762221,Japan,-4.075463
104,December,12,Beijing,40.190632,116.412144,China,-6.100851
105,December,12,Moscow,55.750541,37.617478,Russia,-11.637483
106,December,12,Washington,47.286835,-120.212613,United States,-6.627022


In [11]:
city_mon_temp = pd.pivot_table(city_temp, values='temperature', index=['city', 'country', 'latitude', 'longitude'], columns='month', aggfunc='first')

city_mon_temp = city_mon_temp.reset_index()
city_mon_temp = city_mon_temp.set_index('city')
city_mon_temp.columns = city_mon_temp.columns.values

city_mon_temp

Unnamed: 0_level_0,country,latitude,longitude,April,August,December,February,January,July,June,March,May,November,October,September
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Beijing,China,40.190632,116.412144,1.525956,10.274253,-6.100851,-6.701318,-7.966636,8.582163,9.650098,-3.434638,4.338819,-3.667594,2.651695,6.581024
Berlin,Germany,52.510885,13.398937,-1.160277,2.68895,-8.360735,-10.624344,-10.437891,7.127933,3.915893,-6.503271,2.314219,-6.901084,-4.775165,1.798344
London,United Kingdom,51.489334,-0.144055,-0.910395,3.548651,-9.903182,-9.526874,-10.392227,3.973568,3.746567,-5.171293,1.660261,-7.156269,-0.609587,2.579724
Moscow,Russia,55.750541,37.617478,-2.509506,4.771368,-11.637483,-12.477948,-10.502272,5.832503,1.508958,-8.117082,1.570913,-8.581225,-5.783271,-1.692571
Ottawa,Canada,45.420878,-75.690111,0.556766,6.681072,-7.910544,-9.045722,-6.904456,8.32683,7.945948,-3.008475,4.986711,-2.417545,-1.277278,3.739482
Paris,France,48.85889,2.320041,0.029848,4.451577,-6.460807,-9.238933,-8.806396,5.846569,4.265259,-7.182633,3.206103,-5.676701,-2.070002,3.723502
Tel-Aviv,Israel,32.0853,34.781806,6.120886,11.001116,-3.599407,-1.965023,-1.614218,11.365447,10.456939,-1.195732,10.334666,2.576738,6.264899,8.172682
Tokyo,Japan,35.682194,139.762221,4.230169,10.626718,-4.075463,-4.703622,-4.02319,12.702997,11.069263,-0.150159,7.260753,-0.555226,4.99465,8.42865
Washington,United States,47.286835,-120.212613,-2.027047,5.909458,-6.627022,-8.332875,-9.82337,6.096781,7.946186,-6.393337,1.820516,-3.775081,-2.773141,3.35656


### 2. Data Analysis:

Hint 1: Calculate the annual average temperature using DataFrame.mean(axis). \
Hint 2: Find the city with the highest and lowest average temperature using idxmax() and idxmin() methods.

Calculate the annual average temperature for each city.

Identify the city with the highest and lowest average temperature for the year.

In [12]:
cmt = city_mon_temp
cmt['avg_temp'] = cmt[months].mean(axis=1)
cmt

Unnamed: 0_level_0,country,latitude,longitude,April,August,December,February,January,July,June,March,May,November,October,September,avg_temp
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Beijing,China,40.190632,116.412144,1.525956,10.274253,-6.100851,-6.701318,-7.966636,8.582163,9.650098,-3.434638,4.338819,-3.667594,2.651695,6.581024,1.311081
Berlin,Germany,52.510885,13.398937,-1.160277,2.68895,-8.360735,-10.624344,-10.437891,7.127933,3.915893,-6.503271,2.314219,-6.901084,-4.775165,1.798344,-2.576452
London,United Kingdom,51.489334,-0.144055,-0.910395,3.548651,-9.903182,-9.526874,-10.392227,3.973568,3.746567,-5.171293,1.660261,-7.156269,-0.609587,2.579724,-2.346755
Moscow,Russia,55.750541,37.617478,-2.509506,4.771368,-11.637483,-12.477948,-10.502272,5.832503,1.508958,-8.117082,1.570913,-8.581225,-5.783271,-1.692571,-3.968135
Ottawa,Canada,45.420878,-75.690111,0.556766,6.681072,-7.910544,-9.045722,-6.904456,8.32683,7.945948,-3.008475,4.986711,-2.417545,-1.277278,3.739482,0.139399
Paris,France,48.85889,2.320041,0.029848,4.451577,-6.460807,-9.238933,-8.806396,5.846569,4.265259,-7.182633,3.206103,-5.676701,-2.070002,3.723502,-1.492718
Tel-Aviv,Israel,32.0853,34.781806,6.120886,11.001116,-3.599407,-1.965023,-1.614218,11.365447,10.456939,-1.195732,10.334666,2.576738,6.264899,8.172682,4.826583
Tokyo,Japan,35.682194,139.762221,4.230169,10.626718,-4.075463,-4.703622,-4.02319,12.702997,11.069263,-0.150159,7.260753,-0.555226,4.99465,8.42865,3.817128
Washington,United States,47.286835,-120.212613,-2.027047,5.909458,-6.627022,-8.332875,-9.82337,6.096781,7.946186,-6.393337,1.820516,-3.775081,-2.773141,3.35656,-1.218531


In [13]:
print(f'City with the highest average temperature of {cmt.loc[cmt.avg_temp.idxmax(), "avg_temp"]:.2f} '
      f'is {cmt.avg_temp.idxmax()}')
print(f'City with the lowest average temperature of {cmt.loc[cmt.avg_temp.idxmin(), "avg_temp"]:.2f} '
      f'is {cmt.avg_temp.idxmin()}')

City with the highest average temperature of 4.83 is Tel-Aviv
City with the lowest average temperature of -3.97 is Moscow


### 3. Data Visualization

In [14]:
import plotly.graph_objects as go
from ipywidgets import widgets
from IPython.display import display
import numpy as np


fig = go.FigureWidget(go.Figure())

city_val = widgets.Dropdown(
    description='City:',
    value='All',
    options=np.append('All', city_temp.city.unique())
)

def update_plot(change):
    city_name = city_val.value
    if city_name == 'All':
        temp_trends = city_temp
    else:
        temp_trends = city_temp[city_temp['city'] == city_name]

    fig.data = []

    for city in temp_trends.city.unique():
        temp_trends_city = temp_trends[temp_trends.city==city]
        fig.add_trace(go.Scatter(x=temp_trends_city['month'], y=temp_trends_city['temperature'], name=city, mode='lines', showlegend=True))
    
    # Horizontal line for average temperature
    avg_temp = temp_trends['temperature'].mean()
    fig.add_trace(go.Scatter(x=['January', 'December'], 
                             y=[avg_temp, avg_temp],
                             name='Average', 
                             mode='lines', 
                             line=dict(color='black', dash='dash')))
    fig.update_layout(
        title=f'Temperature Trends in {city_name if city_name != "All" else "All Cities"}',
        xaxis_title='Month',
        yaxis_title='Temperature',
        width=800, height=500
    )


city_val.observe(update_plot, names='value')

display(widgets.VBox([city_val, fig]))

update_plot(None)

VBox(children=(Dropdown(description='City:', options=('All', 'Tel-Aviv', 'London', 'Paris', 'Berlin', 'Tokyo',…