In [None]:
! pip install mysql.connector
! pip install google.cloud
! pip install google-cloud-storage
# fo machine
! pip install Pillow

In [None]:
import requests
import re
import json
import mysql.connector
from datetime import datetime
from bs4 import BeautifulSoup
from google.cloud import storage
from google.oauth2 import service_account
from PIL import Image
from io import BytesIO

In [None]:
! curl ipinfo.io/ip

## Toolbox

In [None]:
def create_mysql_connection(user='root',
    password=r'replace',
    host='replace',
    database='city_weather'):
    connection_config = {
        'user': user,
        'password': password,
        'host': host,
        'database': database,
    }

    try:
        connector = mysql.connector.connect(**connection_config)
        return connector
    except mysql.connector.Error as err:
        print(f"Error: {err}")
        return None

def close_mysql_connection(connector, cursor):
    cursor.close()
    connector.close()

## Fetching cities data
No images, no weather, only fetch city and store in DB. This is run manually, not by cron

### Fetching

In [None]:
TERMINATING_REGEX = 'No Data Available'

def handle_city_soup(city_tr_soup) -> dict:
  city_tds = city_tr_soup.find_all('td')
  id = int(city_tds[0].text)
  name = city_tds[1].text
  latitude = city_tds[3].text
  longtitude = city_tds[4].text
  return {
      'id': id,
      'name': name,
      'lat': float(latitude),
      'lon': float(longtitude)
  }

def handle_cities_page():
  city_base_url = 'https://geokeo.com/database/city/it/{}'

  page_index = 1

  cities_data: list[dict] = []

  while True:
    print(f'Page number {page_index}')
    url = city_base_url.format(page_index)
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)'}, verify=False)

    if response.status_code != 200:
      exit('Invalid response code while fetching cities')

    soup = BeautifulSoup(response.content, 'html.parser')

    if TERMINATING_REGEX in soup.text:
      break

    city_list = soup.find_all('tr')
    city_list.pop(0)
    for city in city_list:
      city_data = handle_city_soup(city)
      cities_data.append(city_data)

    page_index += 1

  return cities_data

cities_data = handle_cities_page()

In [None]:
cities_data

### Writing cities to DB

In [None]:
connector = create_mysql_connection()
cursor = connector.cursor()


placeholders = ', '.join(['%s'] * len(cities_data[0]))
columns = ', '.join(cities_data[0].keys())
table_name = 'city'

query = f'INSERT INTO {table_name} ({columns}) VALUES ({placeholders})'

for city_data in cities_data:
    cursor.execute(query, tuple(city_data.values()))

connector.commit()
close_mysql_connection(connector, cursor)

## Images of cities
Running manually, as it has a call limit on API provider side

### Get links from Unsplash

In [None]:
def get_city_photo_link(city: str) -> str:
  url = 'https://api.unsplash.com/search/photos'
  response = requests.get(url, params={'page': 1, 'query': city}, headers={'Authorization': 'Client-ID replace'})

  if response.status_code != 200:
    return
  picture_data = json.loads(response.content)
  return picture_data['results'][0]['urls']['raw']

In [None]:
connector = create_mysql_connection()
cursor = connector.cursor()

cursor.execute("SELECT * FROM city WHERE link_api IS NULL;")
result = cursor.fetchall()

1

In [None]:
empty_pic_city_data = result
for city in empty_pic_city_data:
  photo_link = get_city_photo_link(city[1])
  print(len(photo_link))
  print(photo_link)

  if photo_link is None:
    break

  query = f"UPDATE city SET link_api = %s WHERE id = %s;"
  cursor.execute(query, (photo_link, city[0]))
  connector.commit()

close_mysql_connection(connector, cursor)

### Upload to bucket

In [None]:
connector = create_mysql_connection()
cursor = connector.cursor()

cursor.execute("SELECT * FROM city;")
result = cursor.fetchall()
cities = result

#### Access to storage client - bucket

In [None]:
def download_image(url: str):
  response = requests.get(url)
  img = Image.open(BytesIO(response.content))
  image_data = BytesIO()
  img.save(image_data, format='JPEG')
  image_data.seek(0)

  return image_data

def upload_image_to_bucket(city_object: tuple, bucket) -> str:
  img = download_image(city_object[4])
  file_name = f'{city_object[0]}.jpg'
  blob = bucket.blob(file_name)
  blob.upload_from_file(img, content_type='image/jpeg')
  return f'https://storage.googleapis.com/{bucket_name}/{file_name}'

In [None]:
key_path = 'keys/service_acc_key.json'
bucket_name = 'mariia_city_weather_project'
credentials = service_account.Credentials.from_service_account_file(key_path)
storage_client = storage.Client(credentials=credentials)
bucket = storage_client.bucket(bucket_name)

In [None]:
city_img_links = {}
for city in cities:
  city_img_links[city[0]] = upload_image_to_bucket(city, bucket)

In [None]:
city_img_links

In [None]:
for key, value in city_img_links.items():
  query = 'UPDATE city SET link_bucket = %s WHERE id = %s'
  cursor.execute(query, (value, key))
connector.commit()
close_mysql_connection(connector, cursor)

## Weather, scheduled by cron

Cron line

`0 * * * * /usr/bin/python3 /home/myusername/fetch_weather.py >> /home/myusername/last_run.txt`

In [None]:
def get_city_weather(lat: float, lon: float) -> dict:
  url = 'https://api.openweathermap.org/data/2.5/weather'

  params = {
    'lat': lat,
    'lon': lon,
    'appid': 'replace',
    'units': 'metric'
  }

  response = requests.get(url, params=params)
  weather_data = json.loads(response.content)

  return {
    'weather_type':  weather_data['weather'][0]['main'],
    'temp': weather_data['main']['temp'],
    'temp_feels': weather_data['main']['feels_like'],
    'max_temp': weather_data['main']['temp_max'],
    'min_temp': weather_data['main']['temp_min'],
    'humidity': weather_data['main']['humidity'],
    'visibility': weather_data['visibility'],
    'dt': datetime.utcfromtimestamp(weather_data['dt'])
  }

In [None]:
import mysql.connector
connector = mysql.connector.connect(
    user='root',
    password=r'.[\yPf4iM%tFC\NB',
    host='34.88.157.55',
    database='city_weather'
)

cursor = connector.cursor()
cursor.execute("SELECT * FROM city;")
cities_data = cursor.fetchall()

In [None]:
city_weather_data: list[dict] = []

for city in cities_data:
  city_weather = get_city_weather(city[2], city[3])
  city_weather_data.append({'city_id': city[0], **city_weather})

In [None]:
city_weather_data[0]

{'city_id': 1,
 'weather_type': 'Clouds',
 'temp': 16.23,
 'temp_feels': 15.61,
 'max_temp': 16.23,
 'min_temp': 16.23,
 'humidity': 65,
 'visibility': 10000,
 'dt': datetime.datetime(2024, 1, 17, 14, 33, 3)}

In [None]:
connector = mysql.connector.connect(
    user='root',
    password=r'.[\yPf4iM%tFC\NB',
    host='34.88.157.55',
    database='city_weather'
)

cursor = connector.cursor()


placeholders = ', '.join(['%s'] * 9)
columns = ', '.join(city_weather_data[0].keys())
table_name = 'weather'

query = f'INSERT INTO {table_name} ({columns}) VALUES ({placeholders})'

for weather in city_weather_data:
    cursor.execute(query, tuple(weather.values()))

connector.commit()


cursor.close()
connector.close()
print('All good')