# Predictive Analytics for Cross-Sell Opportunities

## Step 1: Data Exploration and Analysis

we'll explore the dataset to understand customer behavior, product usage patterns, and potential cross-sell opportunities.

In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better visualizations
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

In [5]:
# Load the dataset
df = pd.read_csv('data/main_data.csv')

# Display basic information
print("Dataset Shape:", df.shape)
print("\nDataset Info:")
df.info()

# Display first few rows
print("\nFirst few rows:")
df.head()

Dataset Shape: (2100, 48)

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2100 entries, 0 to 2099
Data columns (total 48 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   hhid                2100 non-null   int64  
 1   bankid              2100 non-null   int64  
 2   year                2100 non-null   int64  
 3   muniid              2100 non-null   int64  
 4   salaryquarter       2100 non-null   int64  
 5   educ                2100 non-null   int64  
 6   agebingroup         2100 non-null   int64  
 7   depspread           2100 non-null   float64
 8   ln_debt             2100 non-null   float64
 9   hashome2            2100 non-null   int64  
 10  ln_housevalue2      2100 non-null   float64
 11  has_unempl          2100 non-null   int64  
 12  ever_default        2100 non-null   int64  
 13  prate               2100 non-null   float64
 14  rprate              2100 non-null   float64
 15  depositvol    

Unnamed: 0,hhid,bankid,year,muniid,salaryquarter,educ,agebingroup,depspread,ln_debt,hashome2,ln_housevalue2,has_unempl,ever_default,prate,rprate,depositvol,debt,number_years_relid,real_assets,unemployed,deprate,ave_dep,gjeldsrenter,loanvol,hhi_deposit,hhi_loan,w_hhi_dep,w_hhi_loans,w_mshare_dep,w_mshare_loans,birthyear,pop_density,hashome3,loanrate,loanspread_on,parent,retired,ln_inc,wealth,total_income,gross_wealth,ln_wealth,ln_deposits,old_mshare_deposit,mrkt_share_deposit,sameplace,withinmovers,acrossmovers
0,1535,9,1,8,4,2,5,2.761294,10.549853,0,11.22718,0,0,4.10306,4.10306,64654.123407,49283.417687,14,86778.944083,0,2.780835,3463.735027,76.64405,966.054109,0.054414,0.639877,0.460566,0.791456,0.403745,0.878635,1979,5148.153389,0,5.973824,0.931138,1,0,8.786731,84976.608922,44574.239853,84976.608922,11.350131,11.076807,0.227578,0.242738,0,0,0
1,1535,9,2,4,4,1,5,3.255135,11.028943,1,11.226316,1,0,5.929801,5.929801,77730.048998,17822.087189,14,42132.369796,0,2.290667,5089.467848,5.387345,2015.125562,0.335763,0.105918,0.027696,0.894472,0.975192,0.3415,1991,4517.356903,1,3.925706,3.595868,0,0,9.163624,34281.979137,48894.652454,34281.979137,10.442375,11.260997,0.450172,0.837361,0,1,1
2,1535,9,3,7,2,1,1,0.019451,10.975316,0,8.420039,0,0,9.858001,9.858001,9426.361124,93684.684481,14,40639.186855,0,4.394705,1551.335113,10.519545,5468.855269,0.709699,0.005128,0.847915,0.061251,0.206893,0.97699,1966,2012.424519,0,5.94558,0.393652,1,0,8.772132,9604.676349,32093.632405,9604.676349,9.170005,9.151265,0.898697,0.256768,1,1,0
3,1535,9,4,2,2,1,1,1.199785,11.162839,1,9.970744,0,1,3.747845,3.747845,55688.769912,50027.626871,14,40820.004994,1,9.64203,9374.124017,68.953273,262.618638,0.226027,0.467856,0.769788,0.315097,0.509299,0.276146,1953,4032.845507,0,5.253453,0.235037,1,0,7.56523,62127.827146,66001.910541,62127.827146,11.036949,10.927534,0.049735,0.339421,0,1,1
4,1535,9,5,9,1,3,2,4.094816,11.483522,0,10.743217,0,1,4.29892,4.29892,77323.189324,26624.921535,14,73457.929736,0,9.274225,1451.049145,75.299617,5278.634194,0.427294,0.92273,0.088323,0.106654,0.656955,0.079454,1951,3835.744929,1,9.601078,7.381039,0,0,8.022332,10904.323616,35829.299014,10904.323616,9.296915,11.255749,0.727677,0.868969,1,0,0
