# Generate 16 qubits dataset

## Import libraries

In [1]:
import pandas as pd 
import numpy as np

## Read Input file

In [2]:
df = pd.read_csv('Data_16qubit.csv',header=None)
df.rename(columns={32:'target'},inplace=True)
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
0,32.0,-1.07427,0.7726266,-1.27062,-1.27062,-1.27062,-1.27062,1.870973
1,32.0,1.16829,0.6567383,-1.335189,1.335189,1.335189,1.335189,1.335189
2,32.0,-0.09873,1.055609,-0.491429,-0.491429,-0.491429,-0.491429,2.650164
3,32.0,-0.642903,0.9040966,-3.736997,-0.595404,-0.595404,-0.595404,0.595404
4,32.0,-1.368133,0.7726266,-1.564482,-1.564482,-1.564482,-1.564482,1.57711
5,32.0,-0.923172,0.8977604,-3.972246,-0.830654,-0.830654,-0.830654,0.830654
6,32.0,-0.627402,0.7726266,-0.823752,-0.823752,-0.823752,-0.823752,2.317841
7,32.0,0.452158,0.8320418,-2.400441,0.741152,0.741152,0.741152,0.741152
8,32.0,0.182112,0.7726266,-0.014237,-0.014237,-0.014237,-0.014237,3.127355
9,32.0,0.585505,0.8630431,-2.248044,0.893549,0.893549,0.893549,0.893549


## Add noise to the dataset

In [3]:
# set the mean and standard deviation of the noise
mu, sigma = 0, 0.001

# create a new DataFrame to store the noisy observations
df_noisy = pd.DataFrame()

# Select all columns except the target column
cols = df.columns[:-1]

# Generate 5 new observations for each column
for i in range(5):
    # add Gaussian noise to each column of the original DataFrame
    for col in cols:
        noise = np.random.normal(mu, sigma, len(df))
        df_noisy[col] = df[col] * (1 + noise)
    # concatenate the original and noisy DataFrames
    df_noisy['target'] = df['target']
    df = pd.concat([df, df_noisy], axis=0, ignore_index=True)

# shuffle the DataFrame rows
df.loc[df['target'] == 2, 'target'] = -1
df = df.sample(frac=1).reset_index(drop=True)

# display the duplicated and noisy DataFrame
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,23,24,25,26,27,28,29,30,31,target
0,-1.270847,1.337172,-0.491095,-0.595650,-1.565426,-0.829844,-0.823898,0.740915,-0.014236,0.894181,...,1.352356,0.993327,0.845913,-1.303034,0.519285,0.665135,1.217107,-0.729539,-0.807532,-1
1,-1.270620,1.335189,2.650164,0.595404,-1.564482,-0.830654,-0.823752,0.741152,-0.014237,-2.248044,...,1.350943,0.994314,0.846423,-1.302501,0.518371,0.665062,1.216846,-0.728822,-0.807148,1
2,-1.270620,1.335189,-0.491429,-0.595404,-1.564482,-0.830654,2.317841,-0.741152,-0.014237,0.893549,...,1.350943,0.994314,0.846423,-1.302501,0.518371,0.665062,1.216846,-0.728822,-0.807148,1
3,-1.269843,1.334592,-0.491222,-0.595409,-1.566719,-0.831160,-0.824741,0.740864,3.127239,-0.891812,...,1.352791,0.995663,0.847445,-1.304001,0.517466,0.664571,1.219458,-0.729137,-0.807816,1
4,-1.269660,1.334089,2.650619,0.595782,-1.564283,-0.830705,-0.823811,0.741915,-0.014224,-2.248315,...,1.352012,0.993273,0.846797,-1.302167,0.518372,0.664504,1.217278,-0.728820,-0.806813,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,-1.271378,1.331826,-0.490762,-0.594628,-1.561456,-0.831632,-0.822911,0.741261,-0.014219,0.894422,...,1.348094,0.994727,0.846337,1.842987,-0.517403,0.666081,-1.928618,-0.729309,-0.806832,-1
188,-1.270620,1.335189,-0.491429,-0.595404,-1.564482,-0.830654,-0.823752,0.741152,-0.014237,0.893549,...,1.350943,-2.147279,-0.846423,-1.302501,0.518371,0.665062,1.216846,-0.728822,2.334444,-1
189,-1.270620,1.335189,-0.491429,-0.595404,-1.564482,-0.830654,-0.823752,0.741152,-0.014237,0.893549,...,1.350943,0.994314,0.846423,-1.302501,0.518371,0.665062,1.216846,-0.728822,-0.807148,-1
190,-1.271410,1.335741,-0.492136,-0.593331,-1.562900,-0.830301,-0.822724,0.741188,3.133042,-0.892194,...,1.348354,0.994947,0.846041,-1.301970,0.517649,0.665556,1.216149,-0.728889,-0.806466,-1


In [4]:
# save the duplicated and noisy DataFrame to a CSV file
df.to_csv('Data_16qubit_noisy_v3.csv', index=False)