In [2]:
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


with open('AI_2qubits_training_data.txt', 'r') as file:
    data = file.readlines()
    
type_1=data[:2000]
type_2=data[2001:4000]
type_3=data[4001:6000]
delimiter = ' '
first_column_1 = [row.split(delimiter)[0] for row in type_1]
first_column_2 = [row.split(delimiter)[0] for row in type_2]
first_column_3 = [row.split(delimiter)[0] for row in type_3]

def average_frequency(first_column):
    frequencies_per_row = []
    for row in first_column:
        frequency_1 = 0
        frequency_0 = 0
        for bit in row:
            if bit == '1':
                frequency_1 += 1
            elif bit == '0':
                frequency_0 += 1
        total_bits = len(row)
    distribution = {'1': frequency_1 / total_bits, '0': frequency_0 / total_bits}
    frequencies_per_row.append(distribution)
        
    total_frequency_1 = 0
    total_frequency_0 = 0
    num_rows = len(frequencies_per_row)

    for distribution in frequencies_per_row:
        total_frequency_1 += distribution['1']
        total_frequency_0 += distribution['0']

    average_distribution = {
    '1': (total_frequency_1 / num_rows),
    '0': (total_frequency_0 / num_rows)
    }
    
    return(average_distribution)
                                   
print("Average Distribution of Type 1:",average_frequency(first_column_1))
print("Average Distribution of Type 2:",average_frequency(first_column_2))
print("Average Distribution of Type 3:",average_frequency(first_column_3))

def markov_chain(first_column):
    transition_pairs = ['01', '10', '00', '11']
    transition_matrix = {pair: [0]*4 for pair in transition_pairs}

    for row in first_column:
        for i in range(len(row)-1):
            transition=row[i:i+2]
            if transition in transition_pairs:
                transition_matrix[transition][transition_pairs.index(transition)] += 1
    return transition_matrix

print(markov_chain(first_column_1))
print(markov_chain(first_column_2))
print(markov_chain(first_column_3))

from sklearn.utils import shuffle

X=np.array([[int(bit) for bit in row[0]] for row in data])
y=np.array([int(row[1]) for row in data])
X_shuffled, y_shuffled = shuffle(X, y)
#Shuffled in order to prevent learning bias created by organized data.
X_train, X_test, y_train, y_test = train_test_split(X_shuffled, y_shuffled, test_size=0.3, random_state=42)

classifier = LogisticRegression()

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Average Distribution of Type 1: {'1': 0.54, '0': 0.46}
Average Distribution of Type 2: {'1': 0.59, '0': 0.41}
Average Distribution of Type 3: {'1': 0.54, '0': 0.46}
{'01': [46402, 0, 0, 0], '10': [0, 46242, 0, 0], '00': [0, 0, 43427, 0], '11': [0, 0, 0, 61929]}
{'01': [45891, 0, 0, 0], '10': [0, 45643, 0, 0], '00': [0, 0, 40301, 0], '11': [0, 0, 0, 66066]}
{'01': [47812, 0, 0, 0], '10': [0, 47874, 0, 0], '00': [0, 0, 52716, 0], '11': [0, 0, 0, 49499]}
Accuracy: 0.5611111111111111
