In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
# To randomize
np.random.seed(42)

# Creating the first dataset that is linearly seperable with 2 classes
# Classes have the means [2, 2] and [6, 6] with covariance matrix [[0.5, 0], [0, 0.5]]
mean_class_0_D1 = [2, 2]
cov_class_0_D1 = [[1, 0], [0, 1]]
class_0_D1 = np.random.multivariate_normal(mean_class_0_D1, cov_class_0_D1, 100)

mean_class_1_D1 = [6, 6]
cov_class_1_D1 = [[1, 0], [0, 1]]
class_1_D1 = np.random.multivariate_normal(mean_class_1_D1, cov_class_1_D1, 100)

In [3]:
# Creating the second dataset that is not linearly seperable again with 2 classes
# Classes have the means [3, 3] and [4, 4] with covariance matrix [[1.5, 0], [0, 1.5]]
mean_class_0_D2 = [3, 3]
cov_class_0_D2 = [[1, 0], [0, 1]]
class_0_D2 = np.random.multivariate_normal(mean_class_0_D2, cov_class_0_D2, 100)

mean_class_1_D2 = [4, 4]
cov_class_1_D2 = [[1, 0], [0, 1]]
class_1_D2 = np.random.multivariate_normal(mean_class_1_D2, cov_class_1_D2, 100)

In [4]:
# Adding ones and zeros to different classes to be able to use them in further calculations
# Here, D1 and D2 are the datasets that are created above
class_0_D1 = np.hstack((class_0_D1, np.zeros((100, 1))))
class_1_D1 = np.hstack((class_1_D1, np.ones((100, 1))))
D1 = np.vstack((class_0_D1, class_1_D1))

class_0_D2 = np.hstack((class_0_D2, np.zeros((100, 1))))
class_1_D2 = np.hstack((class_1_D2, np.ones((100, 1))))
D2 = np.vstack((class_0_D2, class_1_D2))

In [5]:
# Below, I used a trick to split the dataset into train and test sets using stratify parameter
# This parameter is used to keep the ratio of classes in the train and test sets
# I used 0.1 as the test size, so 10% of the data will be used for testing
# This means 10 vectors from each class will be used for testing as requested
train_D1, test_D1 = train_test_split(D1, test_size=0.1, stratify=D1[:, 2])
train_D2, test_D2 = train_test_split(D2, test_size=0.1, stratify=D2[:, 2])

# Here I split the train and test sets into X and y where X is the features and y is the class labels
X_train_D1, y_train_D1 = train_D1[:, :2], train_D1[:, 2]
X_test_D1, y_test_D1 = test_D1[:, :2], test_D1[:, 2]

X_train_D2, y_train_D2 = train_D2[:, :2], train_D2[:, 2]
X_test_D2, y_test_D2 = test_D2[:, :2], test_D2[:, 2]

In [6]:
print(X_train_D1.shape, y_train_D1.shape)
print(X_test_D1.shape, y_test_D1.shape)

print(X_train_D2.shape, y_train_D2.shape)
print(X_test_D2.shape, y_test_D2.shape)

(180, 2) (180,)
(20, 2) (20,)
(180, 2) (180,)
(20, 2) (20,)
