In [21]:
## The only packages we would need for this pipeline are `Requests`, `os`, `Pandas`, `SQLite`, and `Streamlit`.
## Requests is to make HTTP requests to fetch the data from REST API endpoints
## And Pandas is for data transformations and wrangling.

import requests
import pandas as pd
import os
import json
import re

api_key = os.getenv('API_KEY')

users = (pd.json_normalize(requests
            .get("https://jsonplaceholder.typicode.com/users")
            .json(), sep="_")[["id",
                               "name",
                               "username",
                               "email",
                               "address_geo_lat",
                               "address_geo_lng"]]
                                   .rename(
                                       columns=
                                       {
                                            "id": "customer_id",
                                            "address_geo_lat": "lat",
                                            "address_geo_lng": "lon"
                                    }
                                )
                            )

users[["lat", "lon"]] = (users[["lat", "lon"]]
                         .astype(float))


merged = (pd.read_csv("./data/sales_data.csv", parse_dates=["order_date"])
          .merge(users, on='customer_id'))


weather = []

for index, row in users.iterrows():
    res = (requests
            .get('https://api.openweathermap.org/data/2.5/weather?appid={key}&lon={lon}&lat={lat}&units=metric'
                 .format(key = api_key,
                         lon = row['lon'],
                         lat=row['lat']))
                 .json())
    
    res["customer_id"] = row["customer_id"]

    res = (json.loads(
        re.sub(r'\[|\]', "", json.dumps(res))))
    
    weather.append(res)

weather = pd.json_normalize(weather, sep="_")
weather

Unnamed: 0,base,visibility,dt,timezone,id,name,cod,customer_id,coord_lon,coord_lat,...,main_sea_level,main_grnd_level,wind_speed,wind_deg,wind_gust,clouds_all,sys_sunrise,sys_sunset,snow_1h,rain_1h
0,stations,10000,1685975780,18000,0,,200,1,81.1496,-37.3159,...,1032,1032,4.2,217,4.51,34,1685929366,1685964307,,
1,stations,10000,1685975781,-7200,0,,200,2,-34.4618,-43.9509,...,999,999,4.39,310,4.61,21,1685958317,1685990857,,
2,stations,131,1685975802,-10800,0,,200,3,-47.0653,-68.6102,...,999,999,6.77,36,11.21,100,0,0,0.56,
3,stations,10000,1685975803,-39600,0,,200,4,-164.299,29.4572,...,1017,1017,7.16,299,8.41,100,1685980639,1686030864,,
4,stations,10000,1685975804,14400,0,,200,5,62.5342,-31.8129,...,1030,1030,6.78,57,7.81,100,1685933015,1685969595,,
5,stations,10000,1685975804,18000,0,,200,6,71.7478,-71.4197,...,995,937,3.72,190,5.11,94,0,0,,
6,stations,10000,1685975805,7200,0,,200,7,21.8984,24.8918,...,1003,966,2.22,129,2.81,7,1685936556,1685985562,,
7,stations,10000,1685975806,-28800,0,,200,8,-120.7677,-14.399,...,1014,1014,3.04,59,3.11,41,1685974947,1686015658,,0.13
8,stations,10000,1685975786,-36000,0,,200,9,-168.8889,24.6463,...,1020,1020,3.59,49,3.51,93,1685982373,1686031333,,
9,stations,10000,1685975786,14400,0,,200,10,57.2232,-38.2386,...,1021,1021,10.68,333,17.21,100,1685935261,1685969898,,


In [40]:
## Total sales by customer

merged = (merged
 .assign(sale_value = lambda x: (x['price'] * x['quantity'])))

(merged[["name", "customer_id", "sale_value"]]
.groupby('name')
.sum('sale_value'))

## Average order quantity

(merged[["quantity", "product_id"]]
 .groupby('product_id')
 .mean('quantity'))

## Highest sales generating products

(merged[["name", "product_id", "sale_value"]]
 .groupby('product_id')
 .sum('sale_value')
 .sort_values('sale_value', ascending=False)
 .head(10))

## Series of sales volume by month and year

(merged[["order_date", "sale_value"]]
 .groupby(merged["order_date"].dt.to_period('M'))
 .sum("sale_value"))

(merged[["customer_id", "sale_value"]]
 .merge(weather[["weather_main", "customer_id"]], on='customer_id')
 .groupby("weather_main")[["weather_main", "sale_value"]]
 .mean("sale_value"))

Unnamed: 0_level_0,sale_value
weather_main,Unnamed: 1_level_1
Clear,312.3675
Clouds,310.748786
Rain,300.568396
Snow,304.105882
