In [207]:
## The only packages we would need for this pipeline are `Requests`, `os`, `Pandas`, `SQLite`, and `Streamlit`.
## Requests is to make HTTP requests to fetch the data from REST API endpoints
## And Pandas is for data transformations and wrangling.

import requests
import pandas as pd
import os
import json
import re

api_key = os.getenv('API_KEY')

users = (pd.json_normalize(requests
            .get("https://jsonplaceholder.typicode.com/users")
            .json(), sep="_")[["id",
                               "name",
                               "username",
                               "email",
                               "address_geo_lat",
                               "address_geo_lng"]]
                                   .rename(
                                       columns=
                                       {
                                            "id": "customer_id",
                                            "address_geo_lat": "lat",
                                            "address_geo_lng": "lon"
                                    }
                                )
                            )

users[["lat", "lon"]] = (users[["lat", "lon"]]
                         .astype(float))


merged = (pd.read_csv("./data/sales_data.csv", parse_dates=["order_date"])
          .merge(users, on='customer_id'))


weather = []

for index, row in users.iterrows():
    res = (requests
            .get('https://api.openweathermap.org/data/2.5/weather?appid={key}&lon={lon}&lat={lat}&units=metric'
                 .format(key = api_key,
                         lon = row['lon'],
                         lat=row['lat']))
                 .json())
    
    res["customer_id"] = row["customer_id"]
    res = (json.loads(
        re.sub(r'\[|\]', "", json.dumps(res))))
    weather.append(res)

weather = pd.json_normalize(weather, sep="_")

In [208]:
## Total sales by customer

(merged
 .assign(sales = lambda x: (x['price'] * x['quantity']))
 [["name", "customer_id", "sales"]]
 .groupby('name')
 .sum('sales'))

## Average order quantity

(merged[["quantity", "product_id"]]
 .groupby('product_id')
 .mean('quantity'))

## Highest sales generating products

(merged
 .assign(sales = lambda x: (x['price'] * x['quantity']))
 [["name", "product_id", "sales"]]
 .groupby('product_id')
 .sum('sales')
 .sort_values('sales', ascending=False)
 .head(10))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   order_id     1000 non-null   int64         
 1   customer_id  1000 non-null   int64         
 2   product_id   1000 non-null   int64         
 3   quantity     1000 non-null   int64         
 4   price        1000 non-null   float64       
 5   order_date   1000 non-null   datetime64[ns]
 6   name         1000 non-null   object        
 7   username     1000 non-null   object        
 8   email        1000 non-null   object        
 9   lat          1000 non-null   float64       
 10  lon          1000 non-null   float64       
dtypes: datetime64[ns](1), float64(3), int64(4), object(3)
memory usage: 86.1+ KB


Unnamed: 0,order_id,customer_id,product_id,quantity,price,order_date,name,username,email,lat,lon
0,2334,5,40,3,35.60,2022-06-21,Chelsey Dietrich,Kamren,Lucio_Hettinger@annie.ca,-31.8129,62.5342
1,6588,5,26,1,15.87,2022-10-23,Chelsey Dietrich,Kamren,Lucio_Hettinger@annie.ca,-31.8129,62.5342
2,3569,5,47,9,19.35,2023-05-25,Chelsey Dietrich,Kamren,Lucio_Hettinger@annie.ca,-31.8129,62.5342
3,5200,5,50,6,82.12,2023-02-28,Chelsey Dietrich,Kamren,Lucio_Hettinger@annie.ca,-31.8129,62.5342
4,1589,5,10,1,96.94,2023-01-05,Chelsey Dietrich,Kamren,Lucio_Hettinger@annie.ca,-31.8129,62.5342
...,...,...,...,...,...,...,...,...,...,...,...
995,9985,6,2,10,24.47,2023-02-21,Mrs. Dennis Schulist,Leopoldo_Corkery,Karley_Dach@jasper.info,-71.4197,71.7478
996,7971,6,11,9,82.94,2022-11-01,Mrs. Dennis Schulist,Leopoldo_Corkery,Karley_Dach@jasper.info,-71.4197,71.7478
997,9182,6,38,6,18.15,2023-04-22,Mrs. Dennis Schulist,Leopoldo_Corkery,Karley_Dach@jasper.info,-71.4197,71.7478
998,7967,6,34,10,14.44,2022-07-26,Mrs. Dennis Schulist,Leopoldo_Corkery,Karley_Dach@jasper.info,-71.4197,71.7478
