In [51]:
## The only packages we would need for this pipeline are `Requests`, `os`, `Pandas`, `SQLite`, and `Streamlit`.
## Requests is to make HTTP requests to fetch the data from REST API endpoints
## And Pandas is for data transformations and wrangling.

import requests
import pandas as pd
import os
import json
import re

api_key = os.getenv('API_KEY')

users = (pd.json_normalize(requests
            .get("https://jsonplaceholder.typicode.com/users")
            .json(), sep="_")[["id",
                               "name",
                               "username",
                               "email",
                               "address_geo_lat",
                               "address_geo_lng"]]
                                   .rename(
                                       columns=
                                       {
                                            "id": "customer_id",
                                            "address_geo_lat": "lat",
                                            "address_geo_lng": "lon"
                                    }
                                )
                            )

users[["lat", "lon"]] = (users[["lat", "lon"]]
                         .astype(float))

final = (pd.read_csv("./data/sales_data.csv", parse_dates=["order_date"])
          .merge(users, on='customer_id'))

weather = []

for index, row in users.iterrows():
    res = (requests
            .get('https://api.openweathermap.org/data/2.5/weather?appid={key}&lon={lon}&lat={lat}&units=metric'
                .format(key = api_key,
                         lon = row['lon'],
                         lat=row['lat']))
                    .json())
    
    res["customer_id"] = row["customer_id"]

    res = (json.loads(
        re.sub(r'\[|\]', "", json.dumps(res))))
    
    weather.append(res)

weather = pd.json_normalize(weather, sep="_")


In [49]:
## Total sales by customer

final = (final
 .assign(sale_value = lambda x: (x['price'] * x['quantity'])))

(final[["name", "customer_id", "sale_value"]]
.groupby('name')
.sum('sale_value'))

## Average order quantity

(final[["quantity", "product_id"]]
 .groupby('product_id')
 .mean('quantity'))

## Highest sales generating products

(final[["name", "product_id", "sale_value"]]
 .groupby('product_id')
 .sum('sale_value')
 .sort_values('sale_value', ascending=False)
 .head(10))

## Series of sales volume by month and year

(final[["order_date", "sale_value"]]
 .groupby(final["order_date"].dt.to_period('M'))
 .sum("sale_value"))

## average sale price by weather condition

(final[["customer_id", "sale_value"]]
 .merge(weather[["weather_main", "customer_id"]], on='customer_id')
 .groupby("weather_main")[["weather_main", "sale_value"]]
 .mean("sale_value"))

Unnamed: 0_level_0,sale_value
order_date,Unnamed: 1_level_1
2022-06,25539.24
2022-07,28217.77
2022-08,25121.81
2022-09,26819.41
2022-10,25448.66
2022-11,22888.05
2022-12,29656.92
2023-01,22724.84
2023-02,27308.78
2023-03,24914.67


In [77]:
import sqlite3
import time


con = sqlite3.connect("transasctions.db")

cur = con.cursor()
final.to_sql("sales_pandas", con, if_exists='replace')

cur.execute('''
    drop table if exists sales;
''')
            
cur.execute('''
    CREATE TABLE sales as
    select * from sales_pandas;
''')

<sqlite3.Cursor at 0x14a45edc0>