In [1]:
import numpy as np
import pandas as pd
import re
import random
from bs4 import BeautifulSoup as bs
import requests
import datetime


In [2]:
np.random.seed(42)

In [3]:
df = pd.read_csv("Final Name - Gender - Location.csv", index_col=[0])

In [4]:
df

Unnamed: 0,Name,Gender,DOB,Name Of Area,Pincode,Latitude,Longitude
0,Amoli Veer,Female,1982-03-31,Nana Peth S.O,411002,18.51009,73.85672
1,Kaia Subramanian,Female,1994-06-09,Mundhva AV S.O,411036,18.53135,73.91478
2,Rushil Vish,Male,1999-02-07,Yerwada S.O,411006,18.56318,73.87742
3,Atiksh Manohar,Male,1987-07-01,Dapodi S.O,411012,18.57749,73.83138
4,Ved Kapoor,Male,1988-01-28,Hadapsar S.O,411028,18.50453,73.91255
...,...,...,...,...,...,...,...
1382,Neysa Rai,Female,1989-12-10,Mohamadwadi S.O,411060,18.47335,73.90957
1383,Raunak Kapoor,Male,1984-08-12,Masulkar Colony S.O,411018,18.63374,73.81290
1384,Taara Veer,Female,1995-06-06,Masulkar Colony S.O,411018,18.63374,73.81290
1385,Sarthak Rai,Male,1997-09-03,Guruwar Peth S.O,411042,18.50547,73.85951


In [5]:
df['Flexibility'] = np.random.randint(1, 6, df.shape[0])

In [6]:
df['Flexibility'].value_counts()

1    296
5    284
2    271
4    269
3    267
Name: Flexibility, dtype: int64

In [7]:
df['Body Composition'] = np.random.choice(['Essential Fat', 'Athletes', 'Fitness', 'Acceptable', 'Obese'], df.shape[0])

In [8]:
df['Body Composition']

0       Essential Fat
1             Fitness
2          Acceptable
3            Athletes
4       Essential Fat
            ...      
1382          Fitness
1383          Fitness
1384          Fitness
1385          Fitness
1386         Athletes
Name: Body Composition, Length: 1387, dtype: object

## Creating Composition percentage pattern according to compositions https://www.verywellfit.com/what-is-body-composition-3495614#:~:text=Body%20composition%20is%20the%20proportion,muscle%2C%20bones%2C%20and%20organs.

In [9]:
# Description	Women	Men
# Essential Fat	10% to 13%	2% to 5%
# Athletes	14% to 20%	6% to 13%
# Fitness	21% to 24%	14% to 17%
# Acceptable	25% to 31%	18% to 24%
# Obese	Over 32%	Over 25%

In [10]:
conditions = [
    (df['Gender'] == 'Male') & (df['Body Composition'] == 'Essential Fat'),
    (df['Gender'] == 'Male') & (df['Body Composition'] == 'Athletes'),
    (df['Gender'] == 'Male') & (df['Body Composition'] == 'Fitness'),
    (df['Gender'] == 'Male') & (df['Body Composition'] == 'Acceptable') ,
    (df['Gender'] == 'Male') & (df['Body Composition'] == 'Obese') ,
    (df['Gender'] == 'Female') & (df['Body Composition'] == 'Essential Fat') ,
    (df['Gender'] == 'Female') & (df['Body Composition'] == 'Athletes') ,
    (df['Gender'] == 'Female') & (df['Body Composition'] == 'Fitness') ,
    (df['Gender'] == 'Female') & (df['Body Composition'] == 'Acceptable') ,
    (df['Gender'] == 'Female') & (df['Body Composition'] == 'Obese') ,
    ]



values = [random.randint(2,6), random.randint(6,14), random.randint(14,18),random.randint(18,25), random.randint(25,100),
         random.randint(10,14), random.randint(14,21), random.randint(21,25), random.randint(25,32),random.randint(32,100) ]


df['Composition Percentage'] = np.select(conditions, values)

df.head()


Unnamed: 0,Name,Gender,DOB,Name Of Area,Pincode,Latitude,Longitude,Flexibility,Body Composition,Composition Percentage
0,Amoli Veer,Female,1982-03-31,Nana Peth S.O,411002,18.51009,73.85672,4,Essential Fat,13
1,Kaia Subramanian,Female,1994-06-09,Mundhva AV S.O,411036,18.53135,73.91478,5,Fitness,25
2,Rushil Vish,Male,1999-02-07,Yerwada S.O,411006,18.56318,73.87742,3,Acceptable,19
3,Atiksh Manohar,Male,1987-07-01,Dapodi S.O,411012,18.57749,73.83138,5,Athletes,6
4,Ved Kapoor,Male,1988-01-28,Hadapsar S.O,411028,18.50453,73.91255,5,Essential Fat,3


In [11]:
df['Interactiveness'] = np.random.randint(1, 6, df.shape[0])

In [12]:
df

Unnamed: 0,Name,Gender,DOB,Name Of Area,Pincode,Latitude,Longitude,Flexibility,Body Composition,Composition Percentage,Interactiveness
0,Amoli Veer,Female,1982-03-31,Nana Peth S.O,411002,18.51009,73.85672,4,Essential Fat,13,4
1,Kaia Subramanian,Female,1994-06-09,Mundhva AV S.O,411036,18.53135,73.91478,5,Fitness,25,5
2,Rushil Vish,Male,1999-02-07,Yerwada S.O,411006,18.56318,73.87742,3,Acceptable,19,3
3,Atiksh Manohar,Male,1987-07-01,Dapodi S.O,411012,18.57749,73.83138,5,Athletes,6,3
4,Ved Kapoor,Male,1988-01-28,Hadapsar S.O,411028,18.50453,73.91255,5,Essential Fat,3,3
...,...,...,...,...,...,...,...,...,...,...,...
1382,Neysa Rai,Female,1989-12-10,Mohamadwadi S.O,411060,18.47335,73.90957,2,Fitness,25,4
1383,Raunak Kapoor,Male,1984-08-12,Masulkar Colony S.O,411018,18.63374,73.81290,3,Fitness,14,1
1384,Taara Veer,Female,1995-06-06,Masulkar Colony S.O,411018,18.63374,73.81290,5,Fitness,25,1
1385,Sarthak Rai,Male,1997-09-03,Guruwar Peth S.O,411042,18.50547,73.85951,5,Fitness,14,5


In [19]:
df['Workout Frequency'] = np.random.choice(['Only Weekends', 'Daily', 'Alternate Days', 'Monday To Friday', 'Any Day of Week',  '%s days of week'%np.random.randint(2,5)], df.shape[0])

In [20]:
df

Unnamed: 0,Name,Gender,DOB,Name Of Area,Pincode,Latitude,Longitude,Flexibility,Body Composition,Composition Percentage,Interactiveness,Workout Frequency
0,Amoli Veer,Female,1982-03-31,Nana Peth S.O,411002,18.51009,73.85672,4,Essential Fat,13,4,Monday To Friday
1,Kaia Subramanian,Female,1994-06-09,Mundhva AV S.O,411036,18.53135,73.91478,5,Fitness,25,5,Any Day of Week
2,Rushil Vish,Male,1999-02-07,Yerwada S.O,411006,18.56318,73.87742,3,Acceptable,19,3,4 days of week
3,Atiksh Manohar,Male,1987-07-01,Dapodi S.O,411012,18.57749,73.83138,5,Athletes,6,3,Only Weekends
4,Ved Kapoor,Male,1988-01-28,Hadapsar S.O,411028,18.50453,73.91255,5,Essential Fat,3,3,Monday To Friday
...,...,...,...,...,...,...,...,...,...,...,...,...
1382,Neysa Rai,Female,1989-12-10,Mohamadwadi S.O,411060,18.47335,73.90957,2,Fitness,25,4,Any Day of Week
1383,Raunak Kapoor,Male,1984-08-12,Masulkar Colony S.O,411018,18.63374,73.81290,3,Fitness,14,1,Daily
1384,Taara Veer,Female,1995-06-06,Masulkar Colony S.O,411018,18.63374,73.81290,5,Fitness,25,1,Monday To Friday
1385,Sarthak Rai,Male,1997-09-03,Guruwar Peth S.O,411042,18.50547,73.85951,5,Fitness,14,5,Only Weekends


In [21]:
df['Workout Mode Preferred'] = np.random.choice(['Indoor', 'Outdoor', 'Both'], df.shape[0])

In [22]:
df

Unnamed: 0,Name,Gender,DOB,Name Of Area,Pincode,Latitude,Longitude,Flexibility,Body Composition,Composition Percentage,Interactiveness,Workout Frequency,Workout Mode Preferred
0,Amoli Veer,Female,1982-03-31,Nana Peth S.O,411002,18.51009,73.85672,4,Essential Fat,13,4,Monday To Friday,Outdoor
1,Kaia Subramanian,Female,1994-06-09,Mundhva AV S.O,411036,18.53135,73.91478,5,Fitness,25,5,Any Day of Week,Outdoor
2,Rushil Vish,Male,1999-02-07,Yerwada S.O,411006,18.56318,73.87742,3,Acceptable,19,3,4 days of week,Indoor
3,Atiksh Manohar,Male,1987-07-01,Dapodi S.O,411012,18.57749,73.83138,5,Athletes,6,3,Only Weekends,Both
4,Ved Kapoor,Male,1988-01-28,Hadapsar S.O,411028,18.50453,73.91255,5,Essential Fat,3,3,Monday To Friday,Both
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1382,Neysa Rai,Female,1989-12-10,Mohamadwadi S.O,411060,18.47335,73.90957,2,Fitness,25,4,Any Day of Week,Indoor
1383,Raunak Kapoor,Male,1984-08-12,Masulkar Colony S.O,411018,18.63374,73.81290,3,Fitness,14,1,Daily,Indoor
1384,Taara Veer,Female,1995-06-06,Masulkar Colony S.O,411018,18.63374,73.81290,5,Fitness,25,1,Monday To Friday,Both
1385,Sarthak Rai,Male,1997-09-03,Guruwar Peth S.O,411042,18.50547,73.85951,5,Fitness,14,5,Only Weekends,Indoor


In [23]:
df['is_sports_enthusiast'] = np.random.choice(['Yes', 'No', 'Maybe'], df.shape[0])

In [24]:
df

Unnamed: 0,Name,Gender,DOB,Name Of Area,Pincode,Latitude,Longitude,Flexibility,Body Composition,Composition Percentage,Interactiveness,Workout Frequency,Workout Mode Preferred,is_sports_enthusiast
0,Amoli Veer,Female,1982-03-31,Nana Peth S.O,411002,18.51009,73.85672,4,Essential Fat,13,4,Monday To Friday,Outdoor,Maybe
1,Kaia Subramanian,Female,1994-06-09,Mundhva AV S.O,411036,18.53135,73.91478,5,Fitness,25,5,Any Day of Week,Outdoor,No
2,Rushil Vish,Male,1999-02-07,Yerwada S.O,411006,18.56318,73.87742,3,Acceptable,19,3,4 days of week,Indoor,No
3,Atiksh Manohar,Male,1987-07-01,Dapodi S.O,411012,18.57749,73.83138,5,Athletes,6,3,Only Weekends,Both,No
4,Ved Kapoor,Male,1988-01-28,Hadapsar S.O,411028,18.50453,73.91255,5,Essential Fat,3,3,Monday To Friday,Both,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1382,Neysa Rai,Female,1989-12-10,Mohamadwadi S.O,411060,18.47335,73.90957,2,Fitness,25,4,Any Day of Week,Indoor,Maybe
1383,Raunak Kapoor,Male,1984-08-12,Masulkar Colony S.O,411018,18.63374,73.81290,3,Fitness,14,1,Daily,Indoor,No
1384,Taara Veer,Female,1995-06-06,Masulkar Colony S.O,411018,18.63374,73.81290,5,Fitness,25,1,Monday To Friday,Both,Yes
1385,Sarthak Rai,Male,1997-09-03,Guruwar Peth S.O,411042,18.50547,73.85951,5,Fitness,14,5,Only Weekends,Indoor,Maybe


In [29]:
df['Body Physique Type'] = np.random.choice(['Ectomorph', 'Endomorph', 'Mesomorph',None], df.shape[0])

In [33]:
df = df.drop('Body Physique Type', axis=1)

In [34]:
df

Unnamed: 0,Name,Gender,DOB,Name Of Area,Pincode,Latitude,Longitude,Flexibility,Body Composition,Composition Percentage,Interactiveness,Workout Frequency,Workout Mode Preferred,is_sports_enthusiast
0,Amoli Veer,Female,1982-03-31,Nana Peth S.O,411002,18.51009,73.85672,4,Essential Fat,13,4,Monday To Friday,Outdoor,Maybe
1,Kaia Subramanian,Female,1994-06-09,Mundhva AV S.O,411036,18.53135,73.91478,5,Fitness,25,5,Any Day of Week,Outdoor,No
2,Rushil Vish,Male,1999-02-07,Yerwada S.O,411006,18.56318,73.87742,3,Acceptable,19,3,4 days of week,Indoor,No
3,Atiksh Manohar,Male,1987-07-01,Dapodi S.O,411012,18.57749,73.83138,5,Athletes,6,3,Only Weekends,Both,No
4,Ved Kapoor,Male,1988-01-28,Hadapsar S.O,411028,18.50453,73.91255,5,Essential Fat,3,3,Monday To Friday,Both,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1382,Neysa Rai,Female,1989-12-10,Mohamadwadi S.O,411060,18.47335,73.90957,2,Fitness,25,4,Any Day of Week,Indoor,Maybe
1383,Raunak Kapoor,Male,1984-08-12,Masulkar Colony S.O,411018,18.63374,73.81290,3,Fitness,14,1,Daily,Indoor,No
1384,Taara Veer,Female,1995-06-06,Masulkar Colony S.O,411018,18.63374,73.81290,5,Fitness,25,1,Monday To Friday,Both,Yes
1385,Sarthak Rai,Male,1997-09-03,Guruwar Peth S.O,411042,18.50547,73.85951,5,Fitness,14,5,Only Weekends,Indoor,Maybe


In [35]:
df['like_to_be_mentored'] = np.random.randint(1, 6, df.shape[0])
df['like_to_mentor'] = np.random.randint(1, 6, df.shape[0])

In [36]:
df

Unnamed: 0,Name,Gender,DOB,Name Of Area,Pincode,Latitude,Longitude,Flexibility,Body Composition,Composition Percentage,Interactiveness,Workout Frequency,Workout Mode Preferred,is_sports_enthusiast,like_to_be_mentored,like_to_mentor
0,Amoli Veer,Female,1982-03-31,Nana Peth S.O,411002,18.51009,73.85672,4,Essential Fat,13,4,Monday To Friday,Outdoor,Maybe,1,5
1,Kaia Subramanian,Female,1994-06-09,Mundhva AV S.O,411036,18.53135,73.91478,5,Fitness,25,5,Any Day of Week,Outdoor,No,3,4
2,Rushil Vish,Male,1999-02-07,Yerwada S.O,411006,18.56318,73.87742,3,Acceptable,19,3,4 days of week,Indoor,No,3,1
3,Atiksh Manohar,Male,1987-07-01,Dapodi S.O,411012,18.57749,73.83138,5,Athletes,6,3,Only Weekends,Both,No,1,4
4,Ved Kapoor,Male,1988-01-28,Hadapsar S.O,411028,18.50453,73.91255,5,Essential Fat,3,3,Monday To Friday,Both,No,3,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1382,Neysa Rai,Female,1989-12-10,Mohamadwadi S.O,411060,18.47335,73.90957,2,Fitness,25,4,Any Day of Week,Indoor,Maybe,3,4
1383,Raunak Kapoor,Male,1984-08-12,Masulkar Colony S.O,411018,18.63374,73.81290,3,Fitness,14,1,Daily,Indoor,No,2,5
1384,Taara Veer,Female,1995-06-06,Masulkar Colony S.O,411018,18.63374,73.81290,5,Fitness,25,1,Monday To Friday,Both,Yes,1,1
1385,Sarthak Rai,Male,1997-09-03,Guruwar Peth S.O,411042,18.50547,73.85951,5,Fitness,14,5,Only Weekends,Indoor,Maybe,4,2


In [37]:
df['fitness_goal_to_be achieved'] = np.random.choice(['Lose fat', 'Build muscle', 'Mesomorph','Improve endurance/conditioning', 'Improve athletic skills', 'All'], df.shape[0])

In [38]:
df

Unnamed: 0,Name,Gender,DOB,Name Of Area,Pincode,Latitude,Longitude,Flexibility,Body Composition,Composition Percentage,Interactiveness,Workout Frequency,Workout Mode Preferred,is_sports_enthusiast,like_to_be_mentored,like_to_mentor,fitness_goal_to_be achieved
0,Amoli Veer,Female,1982-03-31,Nana Peth S.O,411002,18.51009,73.85672,4,Essential Fat,13,4,Monday To Friday,Outdoor,Maybe,1,5,Build muscle
1,Kaia Subramanian,Female,1994-06-09,Mundhva AV S.O,411036,18.53135,73.91478,5,Fitness,25,5,Any Day of Week,Outdoor,No,3,4,Improve endurance/conditioning
2,Rushil Vish,Male,1999-02-07,Yerwada S.O,411006,18.56318,73.87742,3,Acceptable,19,3,4 days of week,Indoor,No,3,1,All
3,Atiksh Manohar,Male,1987-07-01,Dapodi S.O,411012,18.57749,73.83138,5,Athletes,6,3,Only Weekends,Both,No,1,4,Lose fat
4,Ved Kapoor,Male,1988-01-28,Hadapsar S.O,411028,18.50453,73.91255,5,Essential Fat,3,3,Monday To Friday,Both,No,3,5,Lose fat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1382,Neysa Rai,Female,1989-12-10,Mohamadwadi S.O,411060,18.47335,73.90957,2,Fitness,25,4,Any Day of Week,Indoor,Maybe,3,4,Improve athletic skills
1383,Raunak Kapoor,Male,1984-08-12,Masulkar Colony S.O,411018,18.63374,73.81290,3,Fitness,14,1,Daily,Indoor,No,2,5,Improve endurance/conditioning
1384,Taara Veer,Female,1995-06-06,Masulkar Colony S.O,411018,18.63374,73.81290,5,Fitness,25,1,Monday To Friday,Both,Yes,1,1,All
1385,Sarthak Rai,Male,1997-09-03,Guruwar Peth S.O,411042,18.50547,73.85951,5,Fitness,14,5,Only Weekends,Indoor,Maybe,4,2,All


In [39]:
df['addons in session'] = np.random.choice(['Meditation', 'Yoga', 'Both','None'], df.shape[0])

Unnamed: 0,Name,Gender,DOB,Name Of Area,Pincode,Latitude,Longitude,Flexibility,Body Composition,Composition Percentage,Interactiveness,Workout Frequency,Workout Mode Preferred,is_sports_enthusiast,like_to_be_mentored,like_to_mentor,fitness_goal_to_be achieved,addons in session
0,Amoli Veer,Female,1982-03-31,Nana Peth S.O,411002,18.51009,73.85672,4,Essential Fat,13,4,Monday To Friday,Outdoor,Maybe,1,5,Build muscle,Yoga
1,Kaia Subramanian,Female,1994-06-09,Mundhva AV S.O,411036,18.53135,73.91478,5,Fitness,25,5,Any Day of Week,Outdoor,No,3,4,Improve endurance/conditioning,Meditation
2,Rushil Vish,Male,1999-02-07,Yerwada S.O,411006,18.56318,73.87742,3,Acceptable,19,3,4 days of week,Indoor,No,3,1,All,Meditation
3,Atiksh Manohar,Male,1987-07-01,Dapodi S.O,411012,18.57749,73.83138,5,Athletes,6,3,Only Weekends,Both,No,1,4,Lose fat,
4,Ved Kapoor,Male,1988-01-28,Hadapsar S.O,411028,18.50453,73.91255,5,Essential Fat,3,3,Monday To Friday,Both,No,3,5,Lose fat,Yoga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1382,Neysa Rai,Female,1989-12-10,Mohamadwadi S.O,411060,18.47335,73.90957,2,Fitness,25,4,Any Day of Week,Indoor,Maybe,3,4,Improve athletic skills,Meditation
1383,Raunak Kapoor,Male,1984-08-12,Masulkar Colony S.O,411018,18.63374,73.81290,3,Fitness,14,1,Daily,Indoor,No,2,5,Improve endurance/conditioning,Yoga
1384,Taara Veer,Female,1995-06-06,Masulkar Colony S.O,411018,18.63374,73.81290,5,Fitness,25,1,Monday To Friday,Both,Yes,1,1,All,Yoga
1385,Sarthak Rai,Male,1997-09-03,Guruwar Peth S.O,411042,18.50547,73.85951,5,Fitness,14,5,Only Weekends,Indoor,Maybe,4,2,All,Both


In [52]:
# Output:
df['Height_in_cms'] = np.random.randint(160, 179, df.shape[0])

In [55]:
df = df.drop('Height', axis=1)

In [56]:
df

Unnamed: 0,Name,Gender,DOB,Name Of Area,Pincode,Latitude,Longitude,Flexibility,Body Composition,Composition Percentage,Interactiveness,Workout Frequency,Workout Mode Preferred,is_sports_enthusiast,like_to_be_mentored,like_to_mentor,fitness_goal_to_be achieved,addons in session,Height_in_cms
0,Amoli Veer,Female,1982-03-31,Nana Peth S.O,411002,18.51009,73.85672,4,Essential Fat,13,4,Monday To Friday,Outdoor,Maybe,1,5,Build muscle,Yoga,166
1,Kaia Subramanian,Female,1994-06-09,Mundhva AV S.O,411036,18.53135,73.91478,5,Fitness,25,5,Any Day of Week,Outdoor,No,3,4,Improve endurance/conditioning,Meditation,163
2,Rushil Vish,Male,1999-02-07,Yerwada S.O,411006,18.56318,73.87742,3,Acceptable,19,3,4 days of week,Indoor,No,3,1,All,Meditation,168
3,Atiksh Manohar,Male,1987-07-01,Dapodi S.O,411012,18.57749,73.83138,5,Athletes,6,3,Only Weekends,Both,No,1,4,Lose fat,,174
4,Ved Kapoor,Male,1988-01-28,Hadapsar S.O,411028,18.50453,73.91255,5,Essential Fat,3,3,Monday To Friday,Both,No,3,5,Lose fat,Yoga,165
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1382,Neysa Rai,Female,1989-12-10,Mohamadwadi S.O,411060,18.47335,73.90957,2,Fitness,25,4,Any Day of Week,Indoor,Maybe,3,4,Improve athletic skills,Meditation,177
1383,Raunak Kapoor,Male,1984-08-12,Masulkar Colony S.O,411018,18.63374,73.81290,3,Fitness,14,1,Daily,Indoor,No,2,5,Improve endurance/conditioning,Yoga,165
1384,Taara Veer,Female,1995-06-06,Masulkar Colony S.O,411018,18.63374,73.81290,5,Fitness,25,1,Monday To Friday,Both,Yes,1,1,All,Yoga,161
1385,Sarthak Rai,Male,1997-09-03,Guruwar Peth S.O,411042,18.50547,73.85951,5,Fitness,14,5,Only Weekends,Indoor,Maybe,4,2,All,Both,172
