In [1]:
# libraries
import csv
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

## Coupon Recommendation data set

In [2]:
# destination: No Urgent Place, Home, Work
# passanger: Alone, Friend(s), Kid(s), Partner (who are the passengers in the car)
# weather: Sunny, Rainy, Snowy
# temperature:55, 80, 30
# time: 2PM, 10AM, 6PM, 7AM, 10PM
# coupon: Restaurant(<$20), Coffee House, Carry out & Take away, Bar, Restaurant($20-$50)
# expiration: 1d, 2h (the coupon expires in 1 day or in 2 hours)
# gender: Female, Male
# age: 21, 46, 26, 31, 41, 50plus, 36, below21
# maritalStatus: Unmarried partner, Single, Married partner, Divorced, Widowed
# has_Children:1, 0
# education: Some college - no degree, Bachelors degree, Associates degree, High School Graduate, Graduate degree (Masters or Doctorate), Some High School
# occupation: Unemployed, Architecture & Engineering, Student,
# Education&Training&Library, Healthcare Support,
# Healthcare Practitioners & Technical, Sales & Related, Management,
# Arts Design Entertainment Sports & Media, Computer & Mathematical,
# Life Physical Social Science, Personal Care & Service,
# Community & Social Services, Office & Administrative Support,
# Construction & Extraction, Legal, Retired,
# Installation Maintenance & Repair, Transportation & Material Moving,
# Business & Financial, Protective Service,
# Food Preparation & Serving Related, Production Occupations,
# Building & Grounds Cleaning & Maintenance, Farming Fishing & Forestry
# income: $37500 - $49999, $62500 - $74999, $12500 - $24999, $75000 - $87499,
# $50000 - $62499, $25000 - $37499, $100000 or More, $87500 - $99999, Less than $12500
# Bar: never, less1, 1~3, gt8, nan4~8 (feature meaning: how many times do you go to a bar every month?)
# CoffeeHouse: never, less1, 4~8, 1~3, gt8, nan (feature meaning: how many times do you go to a coffeehouse every month?)
# CarryAway:n4~8, 1~3, gt8, less1, never (feature meaning: how many times do you get take-away food every month?)
# RestaurantLessThan20: 4~8, 1~3, less1, gt8, never (feature meaning: how many times do you go to a restaurant with an average expense per person of less than $20 every month?)
# Restaurant20To50: 1~3, less1, never, gt8, 4~8, nan (feature meaning: how many times do you go to a restaurant with average expense per person of $20 - $50 every month?)
# toCoupon_GEQ15min:0,1 (feature meaning: driving distance to the restaurant/bar for using the coupon is greater than 15 minutes)
# toCoupon_GEQ25min:0, 1 (feature meaning: driving distance to the restaurant/bar for using the coupon is greater than 25 minutes)
# direction_same:0, 1 (feature meaning: whether the restaurant/bar is in the same direction as your current destination)
# direction_opp:1, 0 (feature meaning: whether the restaurant/bar is in the same direction as your current destination)
# Y:1, 0 (whether the coupon is accepted)

In [3]:
df = pd.read_csv('coupon-recommendation.csv',sep = ',')

cols = ['destination', 'passenger', 'weather', 'temperature', 'time', 'coupon', 'expiration', 'gender', 'age', 
        'maritalStatus', 'has_children', 'education', 'occupation', 'income', 'car', 'Bar', 'CoffeeHouse',
        'CarryAway', 'RestaurantLessThan20', ' Restaurant20To50', 'toCoupon_GEQ5min', 'toCoupon_GEQ15min', 
        'toCoupon_GEQ25min', 'direction_same', 'direction_opp', 'Y']

df.columns = cols


df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12684 entries, 0 to 12683
Data columns (total 26 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   destination           12684 non-null  object
 1   passenger             12684 non-null  object
 2   weather               12684 non-null  object
 3   temperature           12684 non-null  int64 
 4   time                  12684 non-null  object
 5   coupon                12684 non-null  object
 6   expiration            12684 non-null  object
 7   gender                12684 non-null  object
 8   age                   12684 non-null  object
 9   maritalStatus         12684 non-null  object
 10  has_children          12684 non-null  int64 
 11  education             12684 non-null  object
 12  occupation            12684 non-null  object
 13  income                12684 non-null  object
 14  car                   108 non-null    object
 15  Bar                   12577 non-null

Unnamed: 0,destination,passenger,weather,temperature,time,coupon,expiration,gender,age,maritalStatus,...,CoffeeHouse,CarryAway,RestaurantLessThan20,Restaurant20To50,toCoupon_GEQ5min,toCoupon_GEQ15min,toCoupon_GEQ25min,direction_same,direction_opp,Y
0,No Urgent Place,Alone,Sunny,55,2PM,Restaurant(<20),1d,Female,21,Unmarried partner,...,never,,4~8,1~3,1,0,0,0,1,1
1,No Urgent Place,Friend(s),Sunny,80,10AM,Coffee House,2h,Female,21,Unmarried partner,...,never,,4~8,1~3,1,0,0,0,1,0
2,No Urgent Place,Friend(s),Sunny,80,10AM,Carry out & Take away,2h,Female,21,Unmarried partner,...,never,,4~8,1~3,1,1,0,0,1,1
3,No Urgent Place,Friend(s),Sunny,80,2PM,Coffee House,2h,Female,21,Unmarried partner,...,never,,4~8,1~3,1,1,0,0,1,0
4,No Urgent Place,Friend(s),Sunny,80,2PM,Coffee House,1d,Female,21,Unmarried partner,...,never,,4~8,1~3,1,1,0,0,1,0


In [9]:
destination = pd.get_dummies(df['destination'], prefix='destination')
passenger = pd.get_dummies(df['passenger'], prefix='passenger')
weather = pd.get_dummies(df['weather'], prefix='weather')
gender = pd.get_dummies(df['gender'], prefix='gender')

new_df = df.join([destination, passenger, weather, gender])
drop_cols = ["destination", "passenger", "weather"]
new_df = new_df.drop(columns=drop_cols, axis=1)
new_df.columns


KeyError: 'destination'

In [4]:
# Break dataset into train and test data

acceptance_index = list(df.columns).index("Y")
train_cols = list(df.columns[0:acceptance_index]) + list(df.columns[acceptance_index+1:])
label = df.columns[acceptance_index]
X_df = df[list(train_cols)]
y_df = df[label]