# Data Sampling

In [28]:
from sklearn.datasets import load_iris
import pandas as pd

data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df.head(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [3]:
df.shape

(150, 4)

# Random Sampling

Knowning the exact number of samples to return

In [5]:
subset = df.sample(n=100)
subset.shape

(100, 4)

Knowning the percentage of samples to return

In [7]:
subset = df.sample(frac=0.5)
subset.shape

(75, 4)

# Sampling with condition
Return 10 random sample where sepal width (cm) < 3
Firstly count the number of records which satisfy the condition

In [31]:
condition = df['sepal width (cm)'] < 3
condition

0      False
1      False
2      False
3      False
4      False
       ...  
145    False
146     True
147    False
148    False
149    False
Name: sepal width (cm), Length: 150, dtype: bool

In [32]:
true_index = condition[condition == True].index
len(true_index)

57

Since the number of elements satisfying the condition is 57, we can sample at maximum 57 elements

In [33]:
subset = df[condition].sample(n = 10)
subset.shape

(10, 4)

# Sampling at a Constant Rate
Sampling every 10 elements

In [8]:
rate = 10
subset = df[::rate]
subset.shape

(15, 4)

In [9]:
subset.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
10,5.4,3.7,1.5,0.2
20,5.4,3.4,1.7,0.2
30,4.8,3.1,1.6,0.2
40,5.0,3.5,1.3,0.3


# Getting the remaining part of the dataset
First Solution

In [23]:
remaining = df.drop(labels=subset.index)
remaining.shape

(140, 4)

In [24]:
remaining.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


Second Solution

In [22]:
remaining = df[~df.index.isin(subset.index)]
remaining.shape

(140, 4)

In [25]:
remaining.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
