## Merging different datasets

In [1]:
# Importing libraries
import pandas as pd

In [2]:
# Reading the different datasets
cust_dem = pd.read_csv(r"Exported/customer_demographic.csv") # Customer Demographic
trans = pd.read_csv(r"Exported/transactions.csv") # Transactions 
cust_add = pd.read_csv(r"Exported/customer_address.csv") # Customer Address 

### __`Let's First combine Customer Demographic with Customer Address`__

In [3]:
# Viewing customer demographic
cust_dem.head()

Unnamed: 0,customer_id,first_name,last_name,gender,past_3_years_bike_related_purchases,DOB,age,job_title,job_industry_category,wealth_segment,owns_car,tenure
0,1,Laraine,Medendorp,F,93,1953-10-12,67,Executive Secretary,Health,Mass Customer,Yes,11.0
1,2,Eli,Bockman,M,81,1980-12-16,40,Administrative Officer,Financial Services,Mass Customer,Yes,16.0
2,3,Arlin,Dearle,M,61,1954-01-20,66,Recruiting Manager,Property,Mass Customer,Yes,15.0
3,4,Talbot,,M,33,1961-10-03,59,Untitled,IT,Mass Customer,No,7.0
4,5,Sheila-kathryn,Calton,F,56,1977-05-13,43,Senior Editor,Untitled,Affluent Customer,Yes,8.0


In [4]:
# Viewing customer address
cust_add.head()

Unnamed: 0,customer_id,address,postcode,state,property_valuation
0,1,060 Morning Avenue,2016,NSW,10
1,2,6 Meadow Vale Court,2153,NSW,10
2,4,0 Holy Cross Court,4211,QLD,9
3,5,17979 Del Mar Point,2448,NSW,4
4,6,9 Oakridge Court,3216,VIC,9


### - ___**`We can see that both of the datasets have a customer id column so we can concatenate those two datasets on that column`**___

In [5]:
# Merging the customer demographic and customer address datasets to form a new one.
cust_data = pd.merge(cust_dem, cust_add, on="customer_id")

In [6]:
# The dataset is now combined form of the customer address and customer demographic.
cust_data.head()

Unnamed: 0,customer_id,first_name,last_name,gender,past_3_years_bike_related_purchases,DOB,age,job_title,job_industry_category,wealth_segment,owns_car,tenure,address,postcode,state,property_valuation
0,1,Laraine,Medendorp,F,93,1953-10-12,67,Executive Secretary,Health,Mass Customer,Yes,11.0,060 Morning Avenue,2016,NSW,10
1,2,Eli,Bockman,M,81,1980-12-16,40,Administrative Officer,Financial Services,Mass Customer,Yes,16.0,6 Meadow Vale Court,2153,NSW,10
2,4,Talbot,,M,33,1961-10-03,59,Untitled,IT,Mass Customer,No,7.0,0 Holy Cross Court,4211,QLD,9
3,5,Sheila-kathryn,Calton,F,56,1977-05-13,43,Senior Editor,Untitled,Affluent Customer,Yes,8.0,17979 Del Mar Point,2448,NSW,4
4,6,Curr,Duckhouse,M,35,1966-09-16,54,Untitled,Retail,High Net Worth,Yes,13.0,9 Oakridge Court,3216,VIC,9


### __`Now we will combine total Customer Dataset with the Transaction Dataset`__

In [7]:
pd.options.display.max_columns = 30

In [8]:
trans.head()

Unnamed: 0,transaction_id,product_id,customer_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost,profit,product_first_sold_date
0,1,2,2950,2017-02-25,0.0,Approved,Solex,Standard,medium,medium,71.49,53.62,17.87,2012-12-02
1,2,3,3120,2017-05-21,1.0,Approved,Trek Bicycles,Standard,medium,large,2091.47,388.92,1702.55,2014-03-03
2,3,37,402,2017-10-16,0.0,Approved,OHM Cycles,Standard,low,medium,1793.43,248.82,1544.61,1999-07-20
3,4,88,3135,2017-08-31,0.0,Approved,Norco Bicycles,Standard,medium,medium,1198.46,381.1,817.36,1998-12-16
4,5,78,787,2017-10-01,1.0,Approved,Giant Bicycles,Standard,medium,large,1765.3,709.48,1055.82,2015-08-10


In [9]:
# Merging the Transaction and Customer data to get all the data about customers including their transactions..
customer = pd.merge(trans, cust_data, on="customer_id")

In [10]:
customer.head()

Unnamed: 0,transaction_id,product_id,customer_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost,profit,product_first_sold_date,first_name,last_name,gender,past_3_years_bike_related_purchases,DOB,age,job_title,job_industry_category,wealth_segment,owns_car,tenure,address,postcode,state,property_valuation
0,1,2,2950,2017-02-25,0.0,Approved,Solex,Standard,medium,medium,71.49,53.62,17.87,2012-12-02,Kristos,Anthony,M,19,1955-01-11,65,Software Engineer I,Financial Services,Mass Customer,Yes,10.0,984 Hoepker Court,3064,VIC,6
1,11065,1,2950,2017-10-16,0.0,Approved,Giant Bicycles,Standard,medium,medium,1403.5,954.82,448.68,2003-02-07,Kristos,Anthony,M,19,1955-01-11,65,Software Engineer I,Financial Services,Mass Customer,Yes,10.0,984 Hoepker Court,3064,VIC,6
2,18923,62,2950,2017-04-26,0.0,Approved,Solex,Standard,medium,medium,478.16,298.72,179.44,2010-11-05,Kristos,Anthony,M,19,1955-01-11,65,Software Engineer I,Financial Services,Mass Customer,Yes,10.0,984 Hoepker Court,3064,VIC,6
3,2,3,3120,2017-05-21,1.0,Approved,Trek Bicycles,Standard,medium,large,2091.47,388.92,1702.55,2014-03-03,Lauree,O'Donnell,F,89,1979-02-04,41,Clinical Specialist,Health,Mass Customer,Yes,10.0,4 Shopko Circle,2196,NSW,5
4,6862,4,3120,2017-10-05,0.0,Approved,Giant Bicycles,Standard,high,medium,1129.13,677.48,451.65,2011-04-16,Lauree,O'Donnell,F,89,1979-02-04,41,Clinical Specialist,Health,Mass Customer,Yes,10.0,4 Shopko Circle,2196,NSW,5


### Now this dataset includes all information about each customer demographics, address and transaction data.

### we will now export this final dataset

In [11]:
customer.to_csv(r"Exported/all_customers_data.csv", index=False)