<a href="https://colab.research.google.com/github/ihsanmujahid/Codecademy_IHSAN/blob/Website-Funnel-Analysis/Website_Funnel_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

visits = pd.read_csv('visits.csv',
                     parse_dates=[1])
cart = pd.read_csv('cart.csv',
                   parse_dates=[1])
checkout = pd.read_csv('checkout.csv',
                       parse_dates=[1])
purchase = pd.read_csv('purchase.csv',
                       parse_dates=[1])
# Step 1: Inspect the DataFrames
print("Visits:")
print(visits.head())

print("Cart:")
print(cart.head())

print("Checkout:")
print(checkout.head())

print("Purchase:")
print(purchase.head())

# Step 2: Combine visits and cart using a left merge
merged_data = visits.merge(cart, how='left', on='user_id')

# Step 3: Calculate the length of the merged DataFrame
merged_length = len(merged_data)
print("Length of merged DataFrame:", merged_length)

# Step 4: Count the number of null timestamps in 'cart_time'
null_cart_time = merged_data['cart_time'].isnull().sum()
print("Number of null timestamps in 'cart_time':", null_cart_time)

# Step 5: Calculate the percentage of users not placing a t-shirt in their cart
percent_no_cart = (null_cart_time / merged_length) * 100
print("Percentage of users not placing a t-shirt in their cart:", percent_no_cart)

# Step 6: Merge cart and checkout, and count null values in 'checkout_time'
merged_data = merged_data.merge(checkout, how='left', on='user_id')
null_checkout_time = merged_data['checkout_time'].isnull().sum()
percent_no_checkout = (null_checkout_time / merged_length) * 100
print("Percentage of users not proceeding to checkout after adding items to cart:", percent_no_checkout)

# Step 7: Merge all four steps of the funnel
all_data = merged_data.merge(purchase, how='left', on='user_id')
print("Merged funnel data:")
print(all_data.head())

# Step 8: Calculate the percentage of users not purchasing a t-shirt after proceeding to checkout
null_purchase_time = all_data['purchase_time'].isnull().sum()
percent_no_purchase = (null_purchase_time / merged_length) * 100
print("Percentage of users not purchasing a t-shirt after proceeding to checkout:", percent_no_purchase)

# Step 9: Identify the weakest step of the funnel
weakest_step = all_data[['cart_time', 'checkout_time', 'purchase_time']].isnull().mean().idxmax()
print("Weakest step of the funnel:", weakest_step)

# Step 10: Calculate the average time from initial visit to final purchase
all_data['time_to_purchase'] = all_data['purchase_time'] - all_data['visit_time']

# Step 11: Examine the results by printing the new column
print(all_data['time_to_purchase'])

# Step 12: Calculate the average time to purchase
average_time_to_purchase = all_data['time_to_purchase'].mean()
print("Average time to purchase:", average_time_to_purchase)



Visits:
                                user_id          visit_time
0  943647ef-3682-4750-a2e1-918ba6f16188 2017-04-07 15:14:00
1  0c3a3dd0-fb64-4eac-bf84-ba069ce409f2 2017-01-26 14:24:00
2  6e0b2d60-4027-4d9a-babd-0e7d40859fb1 2017-08-20 08:23:00
3  6879527e-c5a6-4d14-b2da-50b85212b0ab 2017-11-04 18:15:00
4  a84327ff-5daa-4ba1-b789-d5b4caf81e96 2017-02-27 11:25:00
Cart:
                                user_id           cart_time
0  2be90e7c-9cca-44e0-bcc5-124b945ff168 2017-11-07 20:45:00
1  4397f73f-1da3-4ab3-91af-762792e25973 2017-05-27 01:35:00
2  a9db3d4b-0a0a-4398-a55a-ebb2c7adf663 2017-03-04 10:38:00
3  b594862a-36c5-47d5-b818-6e9512b939b3 2017-09-27 08:22:00
4  a68a16e2-94f0-4ce8-8ce3-784af0bbb974 2017-07-26 15:48:00
Checkout:
                                user_id       checkout_time
0  d33bdc47-4afa-45bc-b4e4-dbe948e34c0d 2017-06-25 09:29:00
1  4ac186f0-9954-4fea-8a27-c081e428e34e 2017-04-07 20:11:00
2  3c9c78a7-124a-4b77-8d2e-e1926e011e7d 2017-07-13 11:38:00
3  89fe330a-8966