# Binomial Model

In [None]:
#!pip install pandas
#!pip install sqlite3
#!pip install pandasql
#!pip install statsmodels

In [1]:
import pandas as pd
import sqlite3
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
#set your location, slash direction will change for windows and mac
DB = '/Users/thomas/Documents/GitHub/02-intro_sql/SQL/FarmersMarket.db' 
#establish your connection
conn = sqlite3.connect(DB, isolation_level=None,
                       detect_types=sqlite3.PARSE_COLNAMES)

In [3]:
#run your query, use "\" to allow line breaks
db_df = pd.read_sql_query("WITH \
    customer_markets_attended AS \
    ( \
      SELECT DISTINCT \
      customer_id, \
      market_date \
      FROM customer_purchases \
      ORDER BY customer_id, market_date \
    ) \
    SELECT \
    cp.market_date, \
    cp.customer_id, \
    SUM(cp.quantity * cp.cost_to_customer_per_qty) AS purchase_total, \
    COUNT(DISTINCT cp.vendor_id) AS vendors_patronized, \
    COUNT(DISTINCT cp.product_id) AS different_products_purchased, \
    CASE WHEN \
    CAST( \
      JULIANDAY((SELECT MIN(cma3.market_date) \
                 FROM customer_markets_attended AS cma3 \
                 WHERE cma3.customer_id = cp.customer_id \
                 AND cma3.market_date > cp.market_date \
                 GROUP BY cma3.customer_id)) -  \
        JULIANDAY(cp.market_date) AS INTEGER) <= 7 \
    THEN 1 \
    ELSE 0 END AS purchased_again_within_7_days \
    FROM customer_purchases AS cp \
    GROUP BY cp.customer_id, cp.market_date \
    ORDER BY cp.customer_id, cp.market_date"
                          ,conn)

In [4]:
bi_df = db_df.drop(['market_date', 'customer_id'], axis=1)

In [5]:
bi_df.groupby(['purchased_again_within_7_days']).count()

Unnamed: 0_level_0,purchase_total,vendors_patronized,different_products_purchased
purchased_again_within_7_days,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,378,378,378
1,1640,1640,1640


In [6]:
formula = "purchased_again_within_7_days ~ purchase_total + vendors_patronized + different_products_purchased"

In [7]:
binomial = smf.glm(formula=formula, data=bi_df, family=sm.families.Binomial()).fit()

In [8]:
print(binomial.summary())

                       Generalized Linear Model Regression Results                       
Dep. Variable:     purchased_again_within_7_days   No. Observations:                 2018
Model:                                       GLM   Df Residuals:                     2014
Model Family:                           Binomial   Df Model:                            3
Link Function:                             Logit   Scale:                          1.0000
Method:                                     IRLS   Log-Likelihood:                -957.07
Date:                           Mon, 23 Jan 2023   Deviance:                       1914.1
Time:                                   19:39:48   Pearson chi2:                 2.03e+03
No. Iterations:                                4   Pseudo R-squ. (CS):            0.01595
Covariance Type:                       nonrobust                                         
                                   coef    std err          z      P>|z|      [0.025      0.975]
---