# Import the package

In [1]:
import requests
import pandas as pd

# Create User Functions

In [2]:
def fetch_weather_data(weather_api_url):
    '''
    This function get data from the weather API
    and returns the data as a json object
    Input = API url
    Output = data or an error message 
    '''
    try:
        response = requests.get(weather_api_url)
        # Raise an HTTPError if the HTTP request returned an unsuccessful status code
        response.raise_for_status()  
        weather_data_json = response.json()
        return weather_data_json
    except requests.HTTPError as http_error:
        print(f'HTTP error occurred in the dataset: {http_error}')
    except Exception as err:
        print(f'An error occurred in the dataset: {err}')

In [3]:
json = fetch_weather_data("https://api.open-meteo.com/v1/forecast?latitude=51.5085&longitude=-0.1257&hourly=temperature_2m,rain,showers,visibility&past_days=31")

In [4]:
def process_the_data(json):
    '''
    This function process the data 
    Input = json object
    Output = process data or an error message 
    '''
    try:
        # the structure of the data is as expected
        # the except will provide an error if not
        hourly_weather_data = json['hourly']
        df = pd.DataFrame(hourly_weather_data)
        print(df)
        # Convert the timestamp to datetime and set it as an index
        df['time'] = pd.to_datetime(df['time'])
        df.set_index('time', inplace=True)
        
        # Resample the data to daily and sum the required values
        daily_weather_df = df.resample('D').sum()
        return daily_weather_df
    
    except Exception as err:
        print(f'An error occurred during data processing: {err}')

In [5]:
df_process = process_the_data(json)
df_process.head()

                 time  temperature_2m  rain  showers  visibility
0    2023-12-26T00:00            10.5   0.0      0.0     24140.0
1    2023-12-26T01:00             9.9   0.0      0.0     24140.0
2    2023-12-26T02:00             9.5   0.0      0.0     24140.0
3    2023-12-26T03:00             8.8   0.0      0.0     24140.0
4    2023-12-26T04:00             8.3   0.0      0.0     24140.0
..                ...             ...   ...      ...         ...
907  2024-02-01T19:00            12.6   0.0      0.0     24140.0
908  2024-02-01T20:00            12.7   0.0      0.0     24140.0
909  2024-02-01T21:00            12.6   0.0      0.0     24140.0
910  2024-02-01T22:00            12.2   0.0      0.0     24140.0
911  2024-02-01T23:00            11.7   0.0      0.0     24140.0

[912 rows x 5 columns]


Unnamed: 0_level_0,temperature_2m,rain,showers,visibility
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-12-26,187.2,0.1,0.0,577960.0
2023-12-27,247.3,6.6,0.0,537720.0
2023-12-28,272.1,0.0,0.1,575820.0
2023-12-29,226.2,0.1,0.3,568840.0
2023-12-30,224.0,0.6,0.0,558820.0


In [6]:
def save_file_as_parquet(df, file_name):
    '''
    This function save the file as a parquet file
    Input = DataFrame
    Output = Saved parquet file or an error message
    '''
    try:
        # Specify the compression method, or set it to None if you don't want any compression
        df.to_parquet('daily_weather.parquet', compression='gzip')  
        
    except Exception as err:
        print(f'An error occurred while saving the weather dataset: {err}')


In [7]:
save_file_as_parquet(df_process, 'daily_weather.parquet')


In [8]:
def main():
    '''
    This functions calls the other functions in 
    order of processing.
    It wraps and runs all the activity as a unit
    '''
    weather_api_url = "https://api.open-meteo.com/v1/forecast?latitude=51.5085&longitude=-0.1257&hourly=temperature_2m,rain,showers,visibility&past_days=31"
    weather_data = fetch_weather_data(weather_api_url)
    
    if weather_data:
        df_processed_data = process_the_data(weather_data)
        if not df_processed_data.empty:
            save_file_as_parquet(df_processed_data, 'daily_weather.parquet')
        else:
            print("There are no data to save.")
    else:
        print("The pipeline failed to fetch data.")

In [9]:
if __name__ == "__main__":
    main()

                 time  temperature_2m  rain  showers  visibility
0    2023-12-26T00:00            10.5   0.0      0.0     24140.0
1    2023-12-26T01:00             9.9   0.0      0.0     24140.0
2    2023-12-26T02:00             9.5   0.0      0.0     24140.0
3    2023-12-26T03:00             8.8   0.0      0.0     24140.0
4    2023-12-26T04:00             8.3   0.0      0.0     24140.0
..                ...             ...   ...      ...         ...
907  2024-02-01T19:00            12.6   0.0      0.0     24140.0
908  2024-02-01T20:00            12.7   0.0      0.0     24140.0
909  2024-02-01T21:00            12.6   0.0      0.0     24140.0
910  2024-02-01T22:00            12.2   0.0      0.0     24140.0
911  2024-02-01T23:00            11.7   0.0      0.0     24140.0

[912 rows x 5 columns]


# Import the data for testing

In [10]:
import pandas as pd

file_name = 'daily_weather.parquet' 

# Load the Parquet file
df = pd.read_parquet(file_name, engine='pyarrow')  # You can also use engine='fastparquet'

# Display sample data
df.sample(10)

Unnamed: 0_level_0,temperature_2m,rain,showers,visibility
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-12-30,224.0,0.6,0.0,558820.0
2024-01-14,81.5,0.0,0.0,579360.0
2023-12-31,214.4,14.7,0.7,484580.0
2024-01-18,-16.4,0.0,0.0,579360.0
2024-01-31,227.8,1.3,0.0,579360.0
2024-01-06,132.9,0.0,0.2,579360.0
2023-12-29,226.2,0.1,0.3,568840.0
2024-01-21,218.8,5.3,0.0,516720.0
2024-01-30,243.9,0.0,0.0,579360.0
2024-01-16,2.3,0.0,0.0,579360.0


# Unit test with Pytest

In [11]:
import pytest

# Example test
def test_fetch_weather_data():
    url = "https://api.open-meteo.com/v1/forecast?latitude=51.5085&longitude=-0.1257&hourly=temperature_2m,rain,showers,visibility&past_days=31"
    ''' Use requests.get to return a known response and assert 
    that the data returns the correct data structure
    '''
    pass


def test_process_the_data():
    '''Provide some sample data and assert that the 
    transform_data aggregates data correctly
    '''
    pass


def test_save_file_as_parquet():
    ''' We will provide some sample data and use a mock file
    system or a temporary file to test save_data. In addition use 
    assert statement to ensure that the file is saved correctly
    '''
    pass


if __name__ == "__main__":
    pytest.main()

ERROR: usage: ipykernel_launcher.py [options] [file_or_dir] [file_or_dir] [...]
ipykernel_launcher.py: error: unrecognized arguments: -f
  inifile: None
  rootdir: C:\Users\bunmi

