# Connect to database

## Set credentials

In [1]:
import os
import psycopg2 # PostgreSQL database adapter for Python
from dotenv import load_dotenv # Reads the key-value pair from .env file and adds them to environment variable
import pandas as pd

# Load environment variables from .env file
load_dotenv()

# Accessing credentials
db_host = os.getenv("DB_HOST")
db_name = os.getenv("DB_NAME")
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")
db_port = os.getenv("DB_PORT")

## Establish connection

In [2]:
# Connect to the database
conn = psycopg2.connect(
    host=db_host,
    dbname=db_name,
    user=db_user,
    password=db_password,
    port=db_port
)

## Weather data EDA

### Read weather database into dataframe

In [3]:
query="""
    SELECT * 
    FROM agg.t_weather  
"""

df = pd.read_sql_query(query, conn)

  df = pd.read_sql_query(query, conn)


In [23]:
df

Unnamed: 0,postcode,created_ts,forecast_date,forecast_hour,precipitation,precipitation_probability,wind_direction,wind_speed,solar_radiation,sunshine_duration,site
0,PL12,2019-03-23 00:11:24,2019-03-22,0,0.0,9.0,151,8.0,0.0,0.0,"{7,44,47,60}"
1,PL14,2019-03-23 00:10:49,2019-03-22,0,0.0,100.0,161,7.0,0.0,0.0,"{33,35}"
2,PL15,2019-03-23 00:10:46,2019-03-22,0,0.0,60.0,175,6.0,0.0,0.0,{28}
3,PL17,2019-03-23 00:11:21,2019-03-22,0,0.0,9.0,151,8.0,0.0,0.0,{45}
4,PL24,2019-03-23 00:11:23,2019-03-22,0,0.0,100.0,170,6.0,0.0,0.0,"{49,52}"
...,...,...,...,...,...,...,...,...,...,...,...
244123,TR4,2020-03-29 05:06:16,2020-03-29,23,0.0,1.0,49,14.0,0.0,0.0,"{58,62,80,84}"
244124,TR5,2020-03-29 05:06:17,2020-03-29,23,0.0,1.0,49,14.0,0.0,0.0,"{71,86,87,99}"
244125,TR7,2020-03-29 05:05:07,2020-03-29,23,0.0,1.0,44,14.0,0.0,0.0,"{4,13,56,63,69,92,95}"
244126,TR8,2020-03-29 05:05:03,2020-03-29,23,0.0,1.0,44,14.0,0.0,0.0,"{21,22}"


## Check the number of unique postcodes

In [5]:
df['postcode'].unique()

array(['PL12', 'PL14', 'PL15', 'PL17', 'PL24', 'PL25', 'PL26', 'PL27',
       'PL28', 'PL30', 'PL31', 'TR1', 'TR10', 'TR11', 'TR12', 'TR13',
       'TR14', 'TR16', 'TR18', 'TR26', 'TR27', 'TR3', 'TR4', 'TR5', 'TR7',
       'TR8', 'TR9', 'TR15', 'TR2', 'TR20'], dtype=object)

In [6]:
len(df['postcode'].unique())

30

## Visualise these postcodes

In [9]:
import requests

def get_coordinates(postcodes):
    response = requests.post("https://api.postcodes.io/postcodes", json={"postcodes": postcodes})
    data = response.json()
    return {result['query']: (result['result']['latitude'], result['result']['longitude']) if result['result'] else None for result in data['result']}

postcodes = ['PL12', 'PL14', 'PL15']
coordinates = get_coordinates(postcodes)
print(coordinates)

{'PL12': None, 'PL14': None, 'PL15': None}


In [10]:
## Ended up doing it QGIS!

## Check data availability for each postcode

In [22]:
postcode_counts = df.groupby('postcode').size().reset_index(name='count')
postcode_counts.to_csv('weather_data_availability.csv')