### Imports

In [6]:
import pandas as pd
import numpy as np
from pymongo import MongoClient, collection
from datetime import datetime, timezone

### Functions

In [8]:
def create_connection(database: str, collection: str) -> collection.Collection:
    """Return coll object from MongoDB"""
    return MongoClient()[database][collection]


def create_dataframe(coll: collection.Collection, pipeline=dict) -> pd.DataFrame:
    """Generates dataframe based off filter. Uses `.aggregate()` from pymongo."""
    cursor = coll.aggregate(pipeline=pipeline)
    return pd.DataFrame(cursor)


### Data

In [9]:
pipeline = [
    {"$set": {"Funnel Create Date": {"$toDate": "$Funnel Create Date"}}},
    {
        "$match": {
            "Funnel Create Date": {
                "$gte": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
            },
            "Funn Status": "Active",
        }
    },
]

coll = create_connection("deep-diver", "boreport")
df = create_dataframe(coll, pipeline)

### Cleaning

The required format:

![salesdata](C:\\Users\\izzaz\\Documents\\1 Projects\\T - Onboarding of Mutinex MMM\\notebooks\\salesdata.png)

In [55]:
def adjust_columns(df):
    """Returns selected columns and resamples to weekly"""
    return (
        df[
            # Select columns
            [
                "Funnel Create Date",
                "Funnel SO No",
                "Funnel Fact.Package",
                "Blk Cluster",
                "Blk State",
                " Channel",
                "Funn Monthcontractperiod",
            ]
        ]
        # Adjust datetime to be weekly
        .set_index("Funnel Create Date")
        .groupby([" Channel", "Blk State", "Blk Cluster", 'Funnel Fact.Package',"Funn Monthcontractperiod"])
        .resample('W-SUN')
        ['Funnel SO No']
        .count()
        .reset_index()
        .rename(columns={
            " Channel":"channel",
            "Blk State":"geography",
            "Blk Cluster":"geography_breakdown",
            "Funnel Fact.Package":"product",
            "Funn Monthcontractperiod":"contract",
            "Funnel Create Date":"date",
            "Funnel SO No":"quantity",
        
        })
    )

In [56]:
df0 = adjust_columns(df)

In [70]:
(df0
 .assign(channel=df0.channel.str.title().str.strip(),
         geography=df0.geography.str.title().str.strip(),
         geography_breakdown=df0.geography_breakdown.str.title().str.strip(),
         contract=df0.contract.astype('str').apply(lambda x:"24 Months" if x == "24" else "12 Months" if x == "12" else "Contractless" if x == "0" else "Others")
         )
 .sort_values(by='date')
 )

 

Unnamed: 0,channel,geography,geography_breakdown,product,contract,date,quantity
123556,Online,Pulau Pinang,Tanjung Tokong,TIME Fibre 1Gbps Home Broadband (UNLIMITED) + ...,24 Months,2022-01-02,2
44824,Dealer,Selangor,Setia Alam,TIME Fibre 1Gbps Home Broadband (UNLIMITED) + ...,24 Months,2022-01-02,1
5469,Dealer,Johor,Permas Jaya,TIME Fibre 100Mbps Home Broadband (UNLIMITED) ...,24 Months,2022-01-02,1
53739,Dealer,Wilayah Persekutuan Kuala Lumpur,Bangsar South,TIME Fibre 500Mbps Home Broadband (UNLIMITED) ...,24 Months,2022-01-02,3
123324,Online,Pulau Pinang,Tanjung Tokong,TIME Fibre 100Mbps Home Broadband (UNLIMITED) ...,24 Months,2022-01-02,1
...,...,...,...,...,...,...,...
129761,Online,Selangor,Cyberjaya,TIME Fibre 600Mbps Home Broadband (UNLIMITED) ...,24 Months,2024-10-06,7
9898,Dealer,Negeri Sembilan,Nilai,TIME Fibre 600Mbps Home Broadband (UNLIMITED) ...,24 Months,2024-10-06,29
74711,Dealer,Wilayah Persekutuan Kuala Lumpur,Seputeh,TIME Fibre 1Gbps Home Broadband (UNLIMITED) + ...,24 Months,2024-10-06,1
75387,Dealer,Wilayah Persekutuan Kuala Lumpur,Setapak,TIME Fibre 200Mbps Home Broadband (UNLIMITED) ...,24 Months,2024-10-06,36


# TO finish: https://docs.google.com/presentation/d/1Ogjir_OzpYrzLm6-yY7PVzB1_2Ig42SfUATnPGdAA44/edit#slide=id.g28f6c08b5e1_0_543