In [53]:
import pandas as pd
import numpy as np

In [54]:
def clean_currency_and_convert_to_float(series):
    """
    Cleans currency symbols and commas from a Pandas Series and converts it to float.
    Handles errors by coercing non-convertible values to NaN.

    Args:
        series (pd.Series): The Pandas Series to clean.

    Returns:
        pd.Series: The cleaned Pandas Series, converted to float.
    """
    # Remove '₹', commas, and convert to numeric, handling errors with 'coerce'
    cleaned_series = series.astype(str).str.replace(r'[₹,]', '', regex=True).str.strip()
    numeric_series = pd.to_numeric(cleaned_series, errors='coerce')
    return numeric_series



In [55]:
# 1. Data Loading and Initial Exploration
try:
    shark_tank_data = pd.read_csv('Shark Tank India.csv')
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: 'Shark Tank India.csv' not found.  Make sure the file is in the correct directory and named correctly.")
    exit()

print("\nInitial Data Overview:")
print(shark_tank_data.head())
print("\nColumn Information:")
print(shark_tank_data.info())



Data loaded successfully.

Initial Data Overview:
   Season Number      Startup Name  Episode Number  Pitch Number Season Start  \
0              1     BluePineFoods               1             1    20-Dec-21   
1              1      BoozScooters               1             2    20-Dec-21   
2              1  HeartUpMySleeves               1             3    20-Dec-21   
3              1         TagzFoods               2             4    20-Dec-21   
4              1      HeadAndHeart               2             5    20-Dec-21   

  Season End Original Air Date                Episode Title           Anchor  \
0   4-Feb-22         20-Dec-21  Badlegi Business Ki Tasveer  Rannvijay Singh   
1   4-Feb-22         20-Dec-21  Badlegi Business Ki Tasveer  Rannvijay Singh   
2   4-Feb-22         20-Dec-21  Badlegi Business Ki Tasveer  Rannvijay Singh   
3   4-Feb-22         21-Dec-21      Insaan, Ideas Aur Sapne  Rannvijay Singh   
4   4-Feb-22         21-Dec-21      Insaan, Ideas Aur Sapne  Ra

In [56]:
# 2. Data Cleaning and Organization

# Clean 'Total Deal Amount'
shark_tank_data['Total Deal Amount'] = clean_currency_and_convert_to_float(shark_tank_data['Total Deal Amount'])

# Clean 'Valuation Requested'
shark_tank_data['Valuation Requested'] = clean_currency_and_convert_to_float(shark_tank_data['Valuation Requested'])

# Clean specific investment amount columns
investment_columns = [
    'Namita Investment Amount', 'Vineeta Investment Amount', 'Anupam Investment Amount',
    'Aman Investment Amount', 'Peyush Investment Amount', 'Ritesh Investment Amount',
    'Amit Investment Amount', 'Guest Investment Amount'
]

for col in investment_columns:
    shark_tank_data[col] = clean_currency_and_convert_to_float(shark_tank_data[col])

# Clean Equity columns
equity_columns = [
    'Original Offered Equity', 'Total Deal Equity',
    'Namita Investment Equity', 'Vineeta Investment Equity',
    'Anupam Investment Equity', 'Aman Investment Equity',
    'Peyush Investment Equity', 'Ritesh Investment Equity',
    'Amit Investment Equity', 'Guest Investment Equity'
]
for col in equity_columns:
    shark_tank_data[col] = pd.to_numeric(shark_tank_data[col], errors='coerce').fillna(0)

# Clean Yearly Revenue
shark_tank_data['Yearly Revenue'] = clean_currency_and_convert_to_float(shark_tank_data['Yearly Revenue'])

# Handle Missing Values (Impute with 0 for deal amount and investment columns, for simplicity)
shark_tank_data['Total Deal Amount'] = shark_tank_data['Total Deal Amount'].fillna(0)
for col in investment_columns:
    shark_tank_data[col] = shark_tank_data[col].fillna(0)

# Impute missing values in equity columns with 0
for col in equity_columns:
    shark_tank_data[col] = shark_tank_data[col].fillna(0)

shark_tank_data['Valuation Requested'] = shark_tank_data['Valuation Requested'].fillna(0)
shark_tank_data['Yearly Revenue'] = shark_tank_data['Yearly Revenue'].fillna(0)


In [58]:
# 2. Data Cleaning and Organization

# Clean 'Total Deal Amount'
shark_tank_data['Total Deal Amount'] = clean_currency_and_convert_to_float(shark_tank_data['Total Deal Amount'])

# Clean 'Valuation Requested'
shark_tank_data['Valuation Requested'] = clean_currency_and_convert_to_float(shark_tank_data['Valuation Requested'])

# Clean specific investment amount columns
investment_columns = [
    'Namita Investment Amount', 'Vineeta Investment Amount', 'Anupam Investment Amount',
    'Aman Investment Amount', 'Peyush Investment Amount', 'Ritesh Investment Amount',
    'Amit Investment Amount', 'Guest Investment Amount'
]

for col in investment_columns:
    shark_tank_data[col] = clean_currency_and_convert_to_float(shark_tank_data[col])

# Clean Equity columns
equity_columns = [
    'Original Offered Equity', 'Total Deal Equity',
    'Namita Investment Equity', 'Vineeta Investment Equity',
    'Anupam Investment Equity', 'Aman Investment Equity',
    'Peyush Investment Equity', 'Ritesh Investment Equity',
    'Amit Investment Equity', 'Guest Investment Equity'
]
for col in equity_columns:
    shark_tank_data[col] = pd.to_numeric(shark_tank_data[col], errors='coerce').fillna(0)

# Clean Yearly Revenue
shark_tank_data['Yearly Revenue'] = clean_currency_and_convert_to_float(shark_tank_data['Yearly Revenue'])

# Handle Missing Values (Impute with 0 for deal amount and investment columns, for simplicity)
shark_tank_data['Total Deal Amount'] = shark_tank_data['Total Deal Amount'].fillna(0)
for col in investment_columns:
    shark_tank_data[col] = shark_tank_data[col].fillna(0)

# Impute missing values in equity columns with 0
for col in equity_columns:
    shark_tank_data[col] = shark_tank_data[col].fillna(0)

shark_tank_data['Valuation Requested'] = shark_tank_data['Valuation Requested'].fillna(0)
shark_tank_data['Yearly Revenue'] = shark_tank_data['Yearly Revenue'].fillna(0)



# 3. Feature Engineering and Analysis

# a. Success Metrics
shark_tank_data['Deal Success'] = np.where(shark_tank_data['Received Offer'] == 1, 'Successful', 'Unsuccessful')

# b. Investment per Shark
shark_tank_data['Total Sharks Invested'] = shark_tank_data[investment_columns].astype(bool).sum(axis=1)

# c. Calculate the total amount invested by each shark.
shark_investment_by_shark = shark_tank_data[investment_columns].sum()

# d. Find startups with multiple sharks investing
multiple_shark_deals = shark_tank_data[shark_tank_data['Total Sharks Invested'] > 1].shape[0]

# e. Calculate number of deals per industry
deals_per_industry = shark_tank_data['Industry'].value_counts().reset_index()
deals_per_industry.columns = ['Industry', 'Number of Deals']

# f. Calculate the success rate per industry.
industry_success_rate = shark_tank_data.groupby('Industry')['Deal Success'].value_counts(normalize=True).unstack().fillna(0)
industry_success_rate = industry_success_rate['Successful'].reset_index()
industry_success_rate.columns = ['Industry', 'Success Rate']


In [59]:
# 4. Founder Analysis
#  -- Assuming 'Number of Presenters' is a proxy for number of founders
# Convert 'Pitchers Average Age' to numeric, handling non-numeric values
shark_tank_data['Pitchers Average Age'] = pd.to_numeric(shark_tank_data['Pitchers Average Age'], errors='coerce')

avg_age_by_presenters = shark_tank_data.groupby('Number of Presenters')['Pitchers Average Age'].mean().reset_index()
avg_age_by_presenters.columns = ['Number of Presenters', 'Average Age']
# 5. Funding Amount Analysis
total_investment_amount = shark_tank_data['Total Deal Amount'].sum()




In [60]:
# 6. Output Results (for Python)
print("\n--- Project Analysis ---")
print("\nTotal Investment by Each Shark:")
print(shark_investment_by_shark)

print(f"\nNumber of Deals with Multiple Sharks: {multiple_shark_deals}")

print("\nDeals per Industry:")
print(deals_per_industry)

print("\nIndustry Success Rates:")
print(industry_success_rate)

print("\nAverage Pitcher Age by Number of Presenters:")
print(avg_age_by_presenters)

print(f"\nTotal Investment Amount: {total_investment_amount}")





--- Project Analysis ---

Total Investment by Each Shark:
Namita Investment Amount     4061.839268
Vineeta Investment Amount    2987.294500
Anupam Investment Amount     3397.330768
Aman Investment Amount       5453.717516
Peyush Investment Amount     4072.914753
Ritesh Investment Amount     2198.484500
Amit Investment Amount       1234.400000
Guest Investment Amount      2878.682252
dtype: float64

Number of Deals with Multiple Sharks: 198

Deals per Industry:
                        Industry  Number of Deals
0              Food and Beverage              142
1                 Beauty/Fashion              127
2            Technology/Software               58
3                 Medical/Health               56
4                  Manufacturing               52
5                 Lifestyle/Home               43
6              Business Services               42
7             Children/Education               25
8   Vehicles/Electrical Vehicles               20
9        Fitness/Sports/Outdoors  

In [61]:
# 7. Prepare data for Tableau

# Create a new dataframe for shark investments
shark_investments_df = pd.DataFrame(shark_investment_by_shark).reset_index()
shark_investments_df.columns = ['Shark', 'Investment Amount']

# Add a success rate column.
shark_tank_data = shark_tank_data.merge(industry_success_rate, on='Industry', how='left')

# Save the dataframes to CSV files.  These will be used in Tableau.
shark_tank_data.to_csv("shark_tank_data_for_tableau.csv", index=False)
shark_investments_df.to_csv("shark_investments_for_tableau.csv", index=False)

print("\nData has been successfully prepared and saved to 'shark_tank_data_for_tableau.csv' and 'shark_investments_for_tableau.csv'.  These are ready for visualization in Tableau.")


Data has been successfully prepared and saved to 'shark_tank_data_for_tableau.csv' and 'shark_investments_for_tableau.csv'.  These are ready for visualization in Tableau.
