# Simple Random Sampling

In [1]:
import pandas as pd

In [2]:
data=pd.read_csv("Dataset/startup_funding.csv")
data.head()

Unnamed: 0,SNo,Date,StartupName,IndustryVertical,SubVertical,CityLocation,InvestorsName,InvestmentType,AmountInUSD,Remarks
0,0,01/08/2017,TouchKin,Technology,Predictive Care Platform,Bangalore,Kae Capital,Private Equity,1300000.0,
1,1,02/08/2017,Ethinos,Technology,Digital Marketing Agency,Mumbai,Triton Investment Advisors,Private Equity,,
2,2,02/08/2017,Leverage Edu,Consumer Internet,Online platform for Higher Education Services,New Delhi,"Kashyap Deorah, Anand Sankeshwar, Deepak Jain,...",Seed Funding,,
3,3,02/08/2017,Zepo,Consumer Internet,DIY Ecommerce platform,Mumbai,"Kunal Shah, LetsVenture, Anupam Mittal, Hetal ...",Seed Funding,500000.0,
4,4,02/08/2017,Click2Clinic,Consumer Internet,healthcare service aggregator,Hyderabad,"Narottam Thudi, Shireesh Palle",Seed Funding,850000.0,


In [3]:
amount=data['AmountInUSD']

In [4]:
amount.head()
#We have NaN values and in string form

0    1,300,000
1          NaN
2          NaN
3      500,000
4      850,000
Name: AmountInUSD, dtype: object

In [5]:
#dropping NaN values
amount.dropna(inplace=True)
amount.head()

0    1,300,000
3      500,000
4      850,000
5    1,000,000
6    2,600,000
Name: AmountInUSD, dtype: object

In [6]:
#replacing commas in string
a1=amount.str.replace(',','')
a1.head()

0    1300000
3     500000
4     850000
5    1000000
6    2600000
Name: AmountInUSD, dtype: object

In [7]:
#now this is of str type data
a1[0]

'1300000'

In [8]:
a2=pd.to_numeric(a1)
#converting string to numerical value

In [9]:
a2[0]

1300000

In [10]:
type(a2[0])

numpy.int64

In [11]:
#population avg,min and max values:
pop_avg=a2.mean()
pop_min=a2.min()
pop_max=a2.max()
print(pop_avg)
print(pop_min)
print(pop_max)

12031073.099016393
16000
1400000000


In [12]:
population=a2
sample_size=100
population.sample()
#by .sample() we can randomly select some record(by default it gives one record)

1319    10000000
Name: AmountInUSD, dtype: int64

In [13]:
population.sample(10)
#sample function selects random samples withpot replacement

1833      250000
2345       16000
2279    40000000
1050       89000
1466     3000000
1587     3000000
1539      200000
1376      520000
1548      500000
2360    56000000
Name: AmountInUSD, dtype: int64

In [14]:
sample=population.sample(sample_size,random_state=1)
#random state argument ensures that random selected sample remains same 

In [15]:
sample.shape

(100,)

In [16]:
sample_avg=sample.mean()
sample_min=sample.min()
sample_max=sample.max()
print(pop_avg," ",sample_avg)
print(pop_min," ",sample_min)
print(pop_max," ",sample_max)

12031073.099016393   24592930.0
16000   18000
1400000000   1400000000


In [17]:
#Average amount of funding for Banglore or New Delhi(Without Replacement)
data=pd.read_csv("Dataset/startup_funding.csv")
data.head()

Unnamed: 0,SNo,Date,StartupName,IndustryVertical,SubVertical,CityLocation,InvestorsName,InvestmentType,AmountInUSD,Remarks
0,0,01/08/2017,TouchKin,Technology,Predictive Care Platform,Bangalore,Kae Capital,Private Equity,1300000.0,
1,1,02/08/2017,Ethinos,Technology,Digital Marketing Agency,Mumbai,Triton Investment Advisors,Private Equity,,
2,2,02/08/2017,Leverage Edu,Consumer Internet,Online platform for Higher Education Services,New Delhi,"Kashyap Deorah, Anand Sankeshwar, Deepak Jain,...",Seed Funding,,
3,3,02/08/2017,Zepo,Consumer Internet,DIY Ecommerce platform,Mumbai,"Kunal Shah, LetsVenture, Anupam Mittal, Hetal ...",Seed Funding,500000.0,
4,4,02/08/2017,Click2Clinic,Consumer Internet,healthcare service aggregator,Hyderabad,"Narottam Thudi, Shireesh Palle",Seed Funding,850000.0,


In [18]:
BA_ND = data[(data["CityLocation"] == "Bangalore") | (data["CityLocation"] == "New Delhilen")]
len(BA_ND)

627

In [19]:
#working upon population
amount=BA_ND['AmountInUSD']
amount.head()

0     1,300,000
5     1,000,000
8     8,500,000
11          NaN
13    1,000,000
Name: AmountInUSD, dtype: object

In [20]:
amount_copy = amount.copy()
amount_copy.dropna(inplace=True)
amount_copy.head()

0      1,300,000
5      1,000,000
8      8,500,000
13     1,000,000
19    18,500,000
Name: AmountInUSD, dtype: object

In [21]:
a1=amount_copy.str.replace(',','')
a1.head()
a1[0]

'1300000'

In [22]:
a2=pd.to_numeric(a1)

In [23]:
len(a2)

405

In [24]:
#population:
pop_mean=a2.mean()
print(pop_mean)

20700676.809876543


In [25]:
sample_size=50
sample=a2.sample(sample_size,random_state=1)

In [26]:
#sample:
sample_mean=sample.mean()
print(sample_mean)

9405570.0


In [27]:
sampling_error=pop_mean-sample_mean
sampling_error

11295106.809876543

In [28]:
#Average amount of funding for Banglore or New Delhi(Replacement)
data=pd.read_csv("Dataset/startup_funding.csv")
data.head()

Unnamed: 0,SNo,Date,StartupName,IndustryVertical,SubVertical,CityLocation,InvestorsName,InvestmentType,AmountInUSD,Remarks
0,0,01/08/2017,TouchKin,Technology,Predictive Care Platform,Bangalore,Kae Capital,Private Equity,1300000.0,
1,1,02/08/2017,Ethinos,Technology,Digital Marketing Agency,Mumbai,Triton Investment Advisors,Private Equity,,
2,2,02/08/2017,Leverage Edu,Consumer Internet,Online platform for Higher Education Services,New Delhi,"Kashyap Deorah, Anand Sankeshwar, Deepak Jain,...",Seed Funding,,
3,3,02/08/2017,Zepo,Consumer Internet,DIY Ecommerce platform,Mumbai,"Kunal Shah, LetsVenture, Anupam Mittal, Hetal ...",Seed Funding,500000.0,
4,4,02/08/2017,Click2Clinic,Consumer Internet,healthcare service aggregator,Hyderabad,"Narottam Thudi, Shireesh Palle",Seed Funding,850000.0,


In [29]:
reqdata=data[(data["CityLocation"]=="Bangalore") | (data["CityLocation"]=="New Delhi")]

In [30]:
amount=reqdata['AmountInUSD']

In [31]:
amount_copy = amount.copy()
amount_copy.dropna(inplace=True)
amount_copy.head()

0      1,300,000
5      1,000,000
8      8,500,000
13     1,000,000
19    18,500,000
Name: AmountInUSD, dtype: object

In [32]:
amount_copy = amount_copy.str.replace(',', '')  
# Remove commas from the string
amount_copy = pd.to_numeric(amount_copy)

In [33]:
#population:
pop_mean=amount_copy.mean()
print(pop_mean)

18045415.896272287


In [34]:
#sample:
sample_size=50
sample=amount_copy.sample(sample_size,replace=True)
sample_mean=sample.mean()
print(sample_mean)

21243700.0


In [35]:
sampling_error=pop_mean-sample_mean
print(sampling_error)

-3198284.103727713


## Stratified Sampling

In [36]:
data=pd.read_csv("Dataset/startup_funding.csv")
data.head(10)

Unnamed: 0,SNo,Date,StartupName,IndustryVertical,SubVertical,CityLocation,InvestorsName,InvestmentType,AmountInUSD,Remarks
0,0,01/08/2017,TouchKin,Technology,Predictive Care Platform,Bangalore,Kae Capital,Private Equity,1300000.0,
1,1,02/08/2017,Ethinos,Technology,Digital Marketing Agency,Mumbai,Triton Investment Advisors,Private Equity,,
2,2,02/08/2017,Leverage Edu,Consumer Internet,Online platform for Higher Education Services,New Delhi,"Kashyap Deorah, Anand Sankeshwar, Deepak Jain,...",Seed Funding,,
3,3,02/08/2017,Zepo,Consumer Internet,DIY Ecommerce platform,Mumbai,"Kunal Shah, LetsVenture, Anupam Mittal, Hetal ...",Seed Funding,500000.0,
4,4,02/08/2017,Click2Clinic,Consumer Internet,healthcare service aggregator,Hyderabad,"Narottam Thudi, Shireesh Palle",Seed Funding,850000.0,
5,5,01/07/2017,Billion Loans,Consumer Internet,Peer to Peer Lending platform,Bangalore,Reliance Corporate Advisory Services Ltd,Seed Funding,1000000.0,
6,6,03/07/2017,Ecolibriumenergy,Technology,Energy management solutions provider,Ahmedabad,"Infuse Ventures, JLL",Private Equity,2600000.0,
7,7,04/07/2017,Droom,eCommerce,Online marketplace for automobiles,Gurgaon,"Asset Management (Asia) Ltd, Digital Garage Inc",Private Equity,20000000.0,
8,8,05/07/2017,Jumbotail,eCommerce,online marketplace for food and grocery,Bangalore,"Kalaari Capital, Nexus India Capital Advisors",Private Equity,8500000.0,
9,9,05/07/2017,Moglix,eCommerce,B2B marketplace for Industrial products,Noida,"International Finance Corporation, Rocketship,...",Private Equity,12000000.0,


In [37]:
Bangalore=data["CityLocation"] == "Bangalore"
NCR = ((data["CityLocation"] == "New Delhi") | (data["CityLocation"] == "Gurgaon") | (data["CityLocation"] == "Noida"))
Mumbai=data["CityLocation"] == "Mumbai"
Pune=data["CityLocation"] == "Pune"
Hydreabad=data["CityLocation"] == "Hydreabad"

In [38]:
group_a=data[Bangalore]  #Strata A
group_b=data[NCR]        #Strata B
group_c=data[Mumbai]     #Strata C
group_d=data[Pune]       #Strata D
group_e=data[Hydreabad]  #Strata E

In [39]:
group_c

Unnamed: 0,SNo,Date,StartupName,IndustryVertical,SubVertical,CityLocation,InvestorsName,InvestmentType,AmountInUSD,Remarks
1,1,02/08/2017,Ethinos,Technology,Digital Marketing Agency,Mumbai,Triton Investment Advisors,Private Equity,,
3,3,02/08/2017,Zepo,Consumer Internet,DIY Ecommerce platform,Mumbai,"Kunal Shah, LetsVenture, Anupam Mittal, Hetal ...",Seed Funding,500000,
10,10,05/07/2017,Timesaverz,Consumer Internet,Hyperlocal home services provider,Mumbai,BCCL,Private Equity,1000000,
17,17,11/07/2017,Flickstree,Consumer Internet,Video Content Discovery Platform,Mumbai,"Venture Catalysts, Sourav Ganguly, Ankit Adity...",Seed Funding,464000,
45,45,28/07/2017,Jhakaas,Consumer Internet,App-based Aggregator of Offline Businesses,Mumbai,Amen Dhyllon,Seed Funding,,
...,...,...,...,...,...,...,...,...,...,...
2169,2169,18/04/2015,Doormint,On-Demand Handyman Services,,Mumbai,Powai Lake Ventures,Seed Funding,,
2179,2179,23/04/2015,Craftstvilla,Ethnic Product eCommerce,,Mumbai,"Sequoia Capital, Nexus venture Partners, Light...",Private Equity,18000000,Series B
2185,2185,24/04/2015,Coverfox,Online Insurance Aggregator,,Mumbai,"Accel US, Accel India, SAIF Partners.",Private Equity,12000000,Series B
2187,2187,27/04/2015,Grab,Food Logistics & Delivery,,Mumbai,"Oliphans Capital, Haresh Chawla",Seed Funding,1000000,


In [40]:
a=group_a.copy()
b=group_b.copy()
c=group_c.copy()
d=group_d.copy()
e=group_e.copy()
a.dropna(inplace=True)
b.dropna(inplace=True)
c.dropna(inplace=True)
d.dropna(inplace=True)
e.dropna(inplace=True)

In [41]:
a1=a.str.replace(',','')
b1=b.str.replace(',','')
c1=c.str.replace(',','')
d1=d.str.replace(',','')
e1=e.str.replace(',','')

AttributeError: 'DataFrame' object has no attribute 'str'