In [17]:
import csv
import numpy as np
import os
import math
from sklearn.decomposition import IncrementalPCA


def list_to_numpy(data):
    # create a numpy array of the features
    x = np.zeros([len(data) - 1, len(data[0]) - 2])
    y = np.zeros([len(data) - 1], np.uint8)

    i = 0
    for t in data[1:]:
        j = 0
        for f in t[2:]:
            x[i][j] = float(f)
            j += 1
        y[i] = int(t[1])
        i += 1
    
    return x, y

In [18]:
base_dir = "D:\\ms-project-data\\UCF101\\iad"

layers = [0, 1, 2, 3, 4]

for layer in layers:
    iad_train_file = "train_100_%s.csv" % layer
    iad_test_file = "test_%s.csv" % layer

    train = []
    test = []

    with open(os.path.join(base_dir, iad_train_file), newline='') as csv_fd:
        csv_reader = csv.reader(csv_fd)
        for row in csv_reader:
            train.append(row)

    with open(os.path.join(base_dir, iad_test_file), newline='') as csv_fd:
        csv_reader = csv.reader(csv_fd)
        for row in csv_reader:
            test.append(row)
    print("train len = %s, test len = %s" % (len(train), len(test)))
    
    # convert the lists to numpy arrays
    train_x, train_y = list_to_numpy(train)
    test_x, test_y = list_to_numpy(test)
    print("train_x shape = %s" % str(train_x.shape))
    print("train_y shape = %s" % str(train_y.shape))
    print("test_x shape = %s" % str(test_x.shape))
    print("test_y shape = %s" % str(test_y.shape))
    print("layer %s sample data:" % layer)
    print(train_x[1])
    print(train_y[1])
    print(test_x[0])
    print(test_y[0])
    
    for n in range(2, 101):
        ipca = IncrementalPCA(n_components=n, batch_size=10000)
        ipca.fit(train_x)
        train_x_ipca = ipca.transform(train_x)
        test_x_ipca = ipca.transform(test_x)
        train_x_ipca_list = train_x_ipca.tolist()
        test_x_ipca_list = test_x_ipca.tolist()
        
        print("train_x_ipca_list length = %s" % len(train_x_ipca_list))
        print("test_x_ipca_list length = %s" % len(test_x_ipca_list))
        
        # write output to CSV files
        output_file = "train_100_%s_%s.csv" % (layer, n)
        output_path = os.path.join(base_dir, output_file)
        print("writing output to %s" % output_file)
        
        with open(output_path, 'w', newline='') as csv_fd:
            csv_writer = csv.writer(csv_fd, dialect='excel')
            for i, row in enumerate(train_x_ipca_list):
                row.insert(0, train_y[i])
                csv_writer.writerow(row)
        print("%d rows written to %s" % (i, output_file))
        
        output_file = "test_%s_%s.csv" % (layer, n)
        output_path = os.path.join(base_dir, output_file)
        print("writing output to %s" % output_file)
        
        with open(output_path, 'w', newline='') as csv_fd:
            csv_writer = csv.writer(csv_fd, dialect='excel')
            for i, row in enumerate(test_x_ipca_list):
                row.insert(0, test_y[i])
                csv_writer.writerow(row)
        print("%d rows written to %s" % (i, output_file))

train len = 100581, test len = 3263
train_x shape = (100580, 1024)
train_y shape = (100580,)
test_x shape = (3262, 1024)
test_y shape = (3262,)
layer 0 sample data:
[0.33668762 0.3285663  0.28408796 ... 0.69999087 0.70125365 0.3082098 ]
0
[0.34795123 0.31026286 0.3655623  ... 0.44990182 0.3236977  0.15886703]
0
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_2.csv
100579 rows written to train_100_0_2.csv
writing output to test_0_2.csv
3261 rows written to test_0_2.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_3.csv
100579 rows written to train_100_0_3.csv
writing output to test_0_3.csv
3261 rows written to test_0_3.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_4.csv
100579 rows written to train_100_0_4.csv
writing output to test_0_4.csv
3261 rows written to test_0_4.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
w

train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_40.csv
100579 rows written to train_100_0_40.csv
writing output to test_0_40.csv
3261 rows written to test_0_40.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_41.csv
100579 rows written to train_100_0_41.csv
writing output to test_0_41.csv
3261 rows written to test_0_41.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_42.csv
100579 rows written to train_100_0_42.csv
writing output to test_0_42.csv
3261 rows written to test_0_42.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_43.csv
100579 rows written to train_100_0_43.csv
writing output to test_0_43.csv
3261 rows written to test_0_43.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_44.csv
100579 rows written to train_100_0_44.csv
writing outp

3261 rows written to test_0_78.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_79.csv
100579 rows written to train_100_0_79.csv
writing output to test_0_79.csv
3261 rows written to test_0_79.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_80.csv
100579 rows written to train_100_0_80.csv
writing output to test_0_80.csv
3261 rows written to test_0_80.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_81.csv
100579 rows written to train_100_0_81.csv
writing output to test_0_81.csv
3261 rows written to test_0_81.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_82.csv
100579 rows written to train_100_0_82.csv
writing output to test_0_82.csv
3261 rows written to test_0_82.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_0_83.csv
100579 rows written

train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_18.csv
100579 rows written to train_100_1_18.csv
writing output to test_1_18.csv
3261 rows written to test_1_18.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_19.csv
100579 rows written to train_100_1_19.csv
writing output to test_1_19.csv
3261 rows written to test_1_19.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_20.csv
100579 rows written to train_100_1_20.csv
writing output to test_1_20.csv
3261 rows written to test_1_20.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_21.csv
100579 rows written to train_100_1_21.csv
writing output to test_1_21.csv
3261 rows written to test_1_21.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_22.csv
100579 rows written to train_100_1_22.csv
writing outp

3261 rows written to test_1_56.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_57.csv
100579 rows written to train_100_1_57.csv
writing output to test_1_57.csv
3261 rows written to test_1_57.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_58.csv
100579 rows written to train_100_1_58.csv
writing output to test_1_58.csv
3261 rows written to test_1_58.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_59.csv
100579 rows written to train_100_1_59.csv
writing output to test_1_59.csv
3261 rows written to test_1_59.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_60.csv
100579 rows written to train_100_1_60.csv
writing output to test_1_60.csv
3261 rows written to test_1_60.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_61.csv
100579 rows written

3261 rows written to test_1_95.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_96.csv
100579 rows written to train_100_1_96.csv
writing output to test_1_96.csv
3261 rows written to test_1_96.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_97.csv
100579 rows written to train_100_1_97.csv
writing output to test_1_97.csv
3261 rows written to test_1_97.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_98.csv
100579 rows written to train_100_1_98.csv
writing output to test_1_98.csv
3261 rows written to test_1_98.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_99.csv
100579 rows written to train_100_1_99.csv
writing output to test_1_99.csv
3261 rows written to test_1_99.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_1_100.csv
100579 rows writte

100579 rows written to train_100_2_34.csv
writing output to test_2_34.csv
3261 rows written to test_2_34.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_2_35.csv
100579 rows written to train_100_2_35.csv
writing output to test_2_35.csv
3261 rows written to test_2_35.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_2_36.csv
100579 rows written to train_100_2_36.csv
writing output to test_2_36.csv
3261 rows written to test_2_36.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_2_37.csv
100579 rows written to train_100_2_37.csv
writing output to test_2_37.csv
3261 rows written to test_2_37.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_2_38.csv
100579 rows written to train_100_2_38.csv
writing output to test_2_38.csv
3261 rows written to test_2_38.csv
train_x_ipca_list length = 100580
test_x_ipca_l

100579 rows written to train_100_2_73.csv
writing output to test_2_73.csv
3261 rows written to test_2_73.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_2_74.csv
100579 rows written to train_100_2_74.csv
writing output to test_2_74.csv
3261 rows written to test_2_74.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_2_75.csv
100579 rows written to train_100_2_75.csv
writing output to test_2_75.csv
3261 rows written to test_2_75.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_2_76.csv
100579 rows written to train_100_2_76.csv
writing output to test_2_76.csv
3261 rows written to test_2_76.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_2_77.csv
100579 rows written to train_100_2_77.csv
writing output to test_2_77.csv
3261 rows written to test_2_77.csv
train_x_ipca_list length = 100580
test_x_ipca_l

train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_12.csv
100579 rows written to train_100_3_12.csv
writing output to test_3_12.csv
3261 rows written to test_3_12.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_13.csv
100579 rows written to train_100_3_13.csv
writing output to test_3_13.csv
3261 rows written to test_3_13.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_14.csv
100579 rows written to train_100_3_14.csv
writing output to test_3_14.csv
3261 rows written to test_3_14.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_15.csv
100579 rows written to train_100_3_15.csv
writing output to test_3_15.csv
3261 rows written to test_3_15.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_16.csv
100579 rows written to train_100_3_16.csv
writing outp

3261 rows written to test_3_50.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_51.csv
100579 rows written to train_100_3_51.csv
writing output to test_3_51.csv
3261 rows written to test_3_51.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_52.csv
100579 rows written to train_100_3_52.csv
writing output to test_3_52.csv
3261 rows written to test_3_52.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_53.csv
100579 rows written to train_100_3_53.csv
writing output to test_3_53.csv
3261 rows written to test_3_53.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_54.csv
100579 rows written to train_100_3_54.csv
writing output to test_3_54.csv
3261 rows written to test_3_54.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_55.csv
100579 rows written

3261 rows written to test_3_89.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_90.csv
100579 rows written to train_100_3_90.csv
writing output to test_3_90.csv
3261 rows written to test_3_90.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_91.csv
100579 rows written to train_100_3_91.csv
writing output to test_3_91.csv
3261 rows written to test_3_91.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_92.csv
100579 rows written to train_100_3_92.csv
writing output to test_3_92.csv
3261 rows written to test_3_92.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_93.csv
100579 rows written to train_100_3_93.csv
writing output to test_3_93.csv
3261 rows written to test_3_93.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_3_94.csv
100579 rows written

100579 rows written to train_100_4_28.csv
writing output to test_4_28.csv
3261 rows written to test_4_28.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_4_29.csv
100579 rows written to train_100_4_29.csv
writing output to test_4_29.csv
3261 rows written to test_4_29.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_4_30.csv
100579 rows written to train_100_4_30.csv
writing output to test_4_30.csv
3261 rows written to test_4_30.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_4_31.csv
100579 rows written to train_100_4_31.csv
writing output to test_4_31.csv
3261 rows written to test_4_31.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_4_32.csv
100579 rows written to train_100_4_32.csv
writing output to test_4_32.csv
3261 rows written to test_4_32.csv
train_x_ipca_list length = 100580
test_x_ipca_l

100579 rows written to train_100_4_67.csv
writing output to test_4_67.csv
3261 rows written to test_4_67.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_4_68.csv
100579 rows written to train_100_4_68.csv
writing output to test_4_68.csv
3261 rows written to test_4_68.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_4_69.csv
100579 rows written to train_100_4_69.csv
writing output to test_4_69.csv
3261 rows written to test_4_69.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_4_70.csv
100579 rows written to train_100_4_70.csv
writing output to test_4_70.csv
3261 rows written to test_4_70.csv
train_x_ipca_list length = 100580
test_x_ipca_list length = 3262
writing output to train_100_4_71.csv
100579 rows written to train_100_4_71.csv
writing output to test_4_71.csv
3261 rows written to test_4_71.csv
train_x_ipca_list length = 100580
test_x_ipca_l

In [52]:
import tensorflow as tf
import random

In [53]:
input_train = os.path.join(base_dir, 'train_100_4_100.csv')
input_test = os.path.join(base_dir, 'test_4_100.csv')

train = []
test = []

with open(input_train, newline='') as train_fd:
    reader = csv.reader(train_fd)
    for row in reader:
        train.append(row)

with open(input_test, newline='') as test_fd:
    reader = csv.reader(test_fd)
    for row in reader:
        test.append(row)

random.shuffle(train)

In [74]:
tf.reset_default_graph()
sess = tf.InteractiveSession()

# setup tensorflow model
num_features = len(train[0]) - 1
print("num_features = %s" % num_features)
x = tf.placeholder(tf.float32, [None, num_features])
y_ = tf.placeholder(tf.int32)
y_true = tf.one_hot(y_, 101)

# weights and biases
W = tf.Variable(tf.zeros([num_features, 101]))
b = tf.Variable(tf.zeros([101]))

tf.global_variables_initializer().run()

model = tf.matmul(x, W) + b

cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=y_true))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# model evaluation
y_true_class = tf.argmax(y_true, axis=0)
y_pred = tf.nn.softmax(model)
y_pred_class = tf.argmax(y_pred, axis=0)

correct_prediction = tf.equal(y_pred_class, y_true_class)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

num_features = 100


In [79]:
# train the model
epochs = 5
for e in range(0, epochs):
    for i, t in enumerate(train):
        t = [float(i) for i in t]
        X_batch = np.asarray(t[1:])
        X_batch = np.expand_dims(X_batch, 0)
        y_batch = int(t[0])
        sess.run(train_step, feed_dict={x: X_batch, y_: y_batch})
        if i % 10000 == 0:
            print("%s - %s" % (e, i))
    random.shuffle(train)

0 - 0
0 - 10000
0 - 20000
0 - 30000
0 - 40000
0 - 50000
0 - 60000
0 - 70000
0 - 80000
0 - 90000
0 - 100000
1 - 0
1 - 10000
1 - 20000
1 - 30000
1 - 40000
1 - 50000
1 - 60000
1 - 70000
1 - 80000
1 - 90000
1 - 100000
2 - 0
2 - 10000
2 - 20000
2 - 30000
2 - 40000
2 - 50000
2 - 60000
2 - 70000
2 - 80000
2 - 90000
2 - 100000
3 - 0
3 - 10000
3 - 20000
3 - 30000
3 - 40000
3 - 50000
3 - 60000
3 - 70000
3 - 80000
3 - 90000
3 - 100000
4 - 0
4 - 10000
4 - 20000
4 - 30000
4 - 40000
4 - 50000
4 - 60000
4 - 70000
4 - 80000
4 - 90000
4 - 100000


In [80]:
# test the model
cumulative_accuracy = 0.0
for i, t in enumerate(test):
    t = [float(i) for i in t]
    X_batch = np.asarray(t[1:])
    X_batch = np.expand_dims(X_batch, 0)
    y_batch = int(t[0])
    accuracy_out = sess.run(accuracy, feed_dict={x: X_batch, y_: y_batch})
    cumulative_accuracy += accuracy_out
    if i % 1000 == 0:
        print("%s - %s" % (i, accuracy_out))

0 - 1.0
1000 - 0.0
2000 - 0.0
3000 - 0.0


In [81]:
cumulative_accuracy / len(test)

0.011342734518700184