In [1]:
import sqlite3 
from sqlite3 import Error 
import pandas as pd

In [2]:
def create_connection(path):
    connection = None
    try:
        connection = sqlite3.connect(path)
        print("Connection to SQLite DB successful")
    except Error as e:
        print(f"The error '{e}' occurred")

    return connection

In [24]:
connection = create_connection("SQL_marketing.db")

Connection to SQLite DB successful


In [25]:
df = pd.read_csv('marketing_campaign.csv', sep = ';')
df.head()

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,...,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
0,5524,1957,Graduation,Single,58138.0,0,0,2012-09-04,58,635,...,7,0,0,0,0,0,0,3,11,1
1,2174,1954,Graduation,Single,46344.0,1,1,2014-03-08,38,11,...,5,0,0,0,0,0,0,3,11,0
2,4141,1965,Graduation,Together,71613.0,0,0,2013-08-21,26,426,...,4,0,0,0,0,0,0,3,11,0
3,6182,1984,Graduation,Together,26646.0,1,0,2014-02-10,26,11,...,6,0,0,0,0,0,0,3,11,0
4,5324,1981,PhD,Married,58293.0,1,0,2014-01-19,94,173,...,5,0,0,0,0,0,0,3,11,0


In [26]:
df.to_sql('marketing_campaign', connection, if_exists='replace', index=False)

2240

In [27]:
synthetic = pd.read_csv('marketing_synthetic.csv')
synthetic.head(3)

Unnamed: 0,id,target,day,month,duration,contactId,age,gender,job,maritalStatus,education,creditFailure,accountBalance,house,credit,contactType,numberOfContacts,daySinceLastCampaign,numberOfContactsLastCampaign,lastCampaignResult
0,432148809,no,27,may,166,623,30,female,worker,married,highSchool,no,-202,no,no,unknown,2,,0,unknown
1,432184318,no,26,oct,183,1992,42,female,manager,married,uniGraduated,no,2463,no,no,cellPhone,2,,0,unknown
2,432182482,no,5,jun,227,2778,26,female,services,single,highSchool,no,2158,yes,yes,landline,1,,0,unknown


In [28]:
synthetic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31480 entries, 0 to 31479
Data columns (total 20 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   id                            31480 non-null  int64  
 1   target                        31480 non-null  object 
 2   day                           31480 non-null  int64  
 3   month                         31480 non-null  object 
 4   duration                      31480 non-null  int64  
 5   contactId                     31480 non-null  int64  
 6   age                           31480 non-null  int64  
 7   gender                        31480 non-null  object 
 8   job                           31480 non-null  object 
 9   maritalStatus                 31480 non-null  object 
 10  education                     31480 non-null  object 
 11  creditFailure                 31480 non-null  object 
 12  accountBalance                31480 non-null  int64  
 13  h

In [29]:
synthetic.to_sql('synthetic', connection, if_exists='replace', index=False)

31480

## Оконные функции

In [30]:
col = pd.read_sql('SELECT * FROM marketing_campaign LIMIT 1', connection).columns
col

Index(['ID', 'Year_Birth', 'Education', 'Marital_Status', 'Income', 'Kidhome',
       'Teenhome', 'Dt_Customer', 'Recency', 'MntWines', 'MntFruits',
       'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds', 'NumDealsPurchases', 'NumWebPurchases',
       'NumCatalogPurchases', 'NumStorePurchases', 'NumWebVisitsMonth',
       'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1',
       'AcceptedCmp2', 'Complain', 'Z_CostContact', 'Z_Revenue', 'Response'],
      dtype='object')

In [43]:
pd.read_sql('''SELECT ID, Income, 
                    SUM(MntFishProducts) over (partition by Income) as sum_MntFishProducts, 
                    AVG(MntFishProducts) over (partition by Income) as mean_MntFishProducts,
                    MIN(MntFishProducts) over (partition by Income) as min_MntFishProducts,
                    MAX(MntFishProducts) over (partition by Income) as max_MntFishProducts
            FROM marketing_campaign
            WHERE Income is NOT NULL''', connection)

Unnamed: 0,ID,Income,sum_MntFishProducts,mean_MntFishProducts,min_MntFishProducts,max_MntFishProducts
0,6862,1730.0,1,1.0,1,1
1,5376,2447.0,1,1.0,1,1
2,11110,3502.0,0,0.0,0,0
3,9931,4023.0,1,1.0,1,1
4,10311,4428.0,2,2.0,2,2
...,...,...,...,...,...,...
2211,8475,157243.0,1,1.0,1,1
2212,5336,157733.0,2,2.0,2,2
2213,1501,160803.0,17,17.0,17,17
2214,1503,162397.0,2,2.0,2,2


In [44]:
pd.read_sql('''SELECT ID, Recency, 
                    SUM(MntFishProducts) over (partition by Recency) as sum_MntFishProducts, 
                    AVG(MntFishProducts) over (partition by Recency) as mean_MntFishProducts,
                    MIN(MntFishProducts) over (partition by Recency) as min_MntFishProducts,
                    MAX(MntFishProducts) over (partition by Recency) as max_MntFishProducts
            FROM marketing_campaign
            WHERE Income is NOT NULL''', connection)

Unnamed: 0,ID,Recency,sum_MntFishProducts,mean_MntFishProducts,min_MntFishProducts,max_MntFishProducts
0,4047,0,737,26.321429,0,240
1,1386,0,737,26.321429,0,240
2,1473,0,737,26.321429,0,240
3,2795,0,737,26.321429,0,240
4,1826,0,737,26.321429,0,240
...,...,...,...,...,...,...
2211,528,99,905,53.235294,0,188
2212,7947,99,905,53.235294,0,188
2213,2831,99,905,53.235294,0,188
2214,2415,99,905,53.235294,0,188


In [50]:
pd.read_sql('''SELECT ID, dense_rank() over w as rank, Education, MntFishProducts
            FROM marketing_campaign
            window w as (order by MntFishProducts desc)
            order by rank''', connection)

Unnamed: 0,ID,rank,Education,MntFishProducts
0,7342,1,2n Cycle,259
1,3091,2,2n Cycle,258
2,4676,2,Master,258
3,762,2,2n Cycle,258
4,0,3,Graduation,254
...,...,...,...,...
2235,6168,182,Graduation,0
2236,692,182,Graduation,0
2237,4838,182,Graduation,0
2238,1448,182,Master,0


In [52]:
pd.read_sql('''SELECT ID, dense_rank() over w as rank, Education, MntFishProducts
            FROM marketing_campaign
            window w as (partition by Education
                        order by MntFishProducts desc)
            order by Education, rank''', connection)

Unnamed: 0,ID,rank,Education,MntFishProducts
0,7342,1,2n Cycle,259
1,3091,2,2n Cycle,258
2,762,2,2n Cycle,258
3,1513,3,2n Cycle,223
4,10648,4,2n Cycle,220
...,...,...,...,...
2235,1612,88,PhD,0
2236,1631,88,PhD,0
2237,1818,88,PhD,0
2238,8858,88,PhD,0
