In [3]:
import pandas as pd
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from ydata_profiling import ProfileReport
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
pd.set_option('display.width', 150)
pd.set_option('future.no_silent_downcasting', True)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)
sns.set()

In [4]:
dates = ['session_start', 'session_end', 'session_date', 'order_dt']
df = pd.read_csv('ecom_go_2.csv', parse_dates=dates)
df['week'] = pd.to_datetime(df['session_date']).dt.isocalendar().week
df_pl = pl.from_pandas(df)

print(df.shape)
print(df.columns.tolist())

(1009, 19)
['user_id', 'region', 'device', 'channel', 'session_start', 'session_end', 'sessiondurationsec', 'session_date', 'month', 'day', 'hour_of_day', 'order_dt', 'revenue', 'payment_type', 'promo_code', 'final_price', 'time_of_day', 'payer', 'week']


In [5]:
df['revenue'] = np.where(df['revenue'] == 100000, 9999, df['revenue'])
df['revenue'] = np.where(df['revenue'] == 1, 4999, df['revenue'])
print(df.shape, df['payer'].sum())

(1009, 19) 282


In [6]:
profile = ProfileReport(df, title="Pandas Profiling Report")

In [7]:
profile.to_widgets()

Summarize dataset: 100%|█████████████| 78/78 [00:05<00:00, 15.15it/s, Completed]
Generate report structure: 100%|██████████████████| 1/1 [00:04<00:00,  4.55s/it]
Render widgets:   0%|                                     | 0/1 [00:00<?, ?it/s]

Unnamed: 0,channel,day,device,final_price,hour_of_day,month,payer,payment_type,promo_code,region,revenue,sessiondurationsec,time_of_day,user_id,week
channel,1.0,0.040553,0.160184,0.097636,0.08611,0.086436,0.0,0.0,0.068009,0.402801,0.0,0.07389,0.063816,0.040297,0.077869
day,0.040553,1.0,0.0,-0.027826,0.001399,0.024576,0.031965,0.033739,0.097078,0.0,0.0,0.002711,0.032958,0.001042,-0.014283
device,0.160184,0.0,1.0,0.0,0.042781,0.0,0.0,0.133032,0.0,0.201424,0.0,0.048311,0.008002,0.0,0.02684
final_price,0.097636,-0.027826,0.0,1.0,-0.00047,-0.08962,1.0,0.0,0.076915,0.118138,0.280114,-0.031347,0.0,0.25568,-0.084101
hour_of_day,0.08611,0.001399,0.042781,-0.00047,1.0,-0.01036,0.078211,0.0,0.0,0.0,0.059624,-0.034125,0.864193,-0.008714,-0.013412
month,0.086436,0.024576,0.0,-0.08962,-0.01036,1.0,0.120856,0.0,0.08651,0.0,0.096449,0.026261,0.0,0.07195,0.984861
payer,0.0,0.031965,0.0,1.0,0.078211,0.120856,1.0,1.0,1.0,0.0,0.999006,0.019701,0.0,0.0,0.150233
payment_type,0.0,0.033739,0.133032,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.096624,0.007546,0.0,0.0,0.0
promo_code,0.068009,0.097078,0.0,0.076915,0.0,0.08651,1.0,0.0,1.0,0.0,0.0,0.141434,0.129124,0.610746,0.127268
region,0.402801,0.0,0.201424,0.118138,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.042393,0.0,0.049944,0.0


Unnamed: 0,user_id,region,device,channel,session_start,session_end,sessiondurationsec,session_date,month,day,hour_of_day,order_dt,revenue,payment_type,promo_code,final_price,time_of_day,payer,week
0,529697267522,United States,iPhone,социальные сети,2019-05-01 00:06:40,2019-05-01 00:07:06,26,2019-05-01,5,3,0,2019-05-01 00:06:40,9999,Mobile payments,0.0,9999.0,night,1,18
1,601292388085,United States,PC,organic,2019-05-01 06:56:16,2019-05-01 07:09:18,782,2019-05-01,5,3,7,NaT,0,,,,morning,0,18
2,852898876338,United States,Mac,социальные сети,2019-05-01 04:30:45,2019-05-01 04:34:56,251,2019-05-01,5,3,4,NaT,0,,,,night,0,18
3,998513020664,United States,iPhone,социальные сети,2019-05-01 18:53:42,2019-05-01 18:57:35,233,2019-05-01,5,3,18,NaT,0,,,,evening,0,18
4,240702200943,United States,Mac,социальные сети,2019-05-02 14:04:32,2019-05-02 14:09:51,319,2019-05-02,5,4,14,NaT,0,,,,day,0,18
5,271758921583,United States,iPhone,социальные сети,2019-05-02 08:40:35,2019-05-02 08:41:15,40,2019-05-02,5,4,8,NaT,0,,,,morning,0,18
6,425357155257,United States,Android,социальные сети,2019-05-02 16:32:29,2019-05-02 16:55:30,1381,2019-05-02,5,4,16,NaT,0,,,,day,0,18
7,768721432035,France,Mac,реклама у блогеров,2019-05-02 10:21:30,2019-05-02 10:51:54,1824,2019-05-02,5,4,10,NaT,0,,,,day,0,18
8,177410811849,United States,Android,социальные сети,2019-05-03 18:43:49,2019-05-03 19:30:52,2823,2019-05-03,5,5,19,NaT,0,,,,evening,0,18
9,337713702816,United States,iPhone,реклама у блогеров,2019-05-03 19:48:35,2019-05-03 20:16:10,1655,2019-05-03,5,5,20,NaT,0,,,,evening,0,18




VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…

In [8]:
profile.to_file("your_report.html")

Render HTML: 100%|████████████████████████████████| 1/1 [00:01<00:00,  1.08s/it]
Export report to file: 100%|█████████████████████| 1/1 [00:00<00:00, 164.93it/s]
