In [None]:
# i=[]
# while(True):
#   i.append("a")

In [None]:
import numpy as np
import os
import csv
import random

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, GRU, Dense, Input, Dropout

In [None]:
# set manual seed for reproducibility
seed = 33

# general reproducibility
random.seed(seed)
np.random.seed(seed)

In [None]:
#constant
num_lable_kinds = 5
both_cols = 6
high_rows = 60
short_rows = 50

output_dim = 5

In [None]:
#numpy to store data
high_data = np.empty((0, high_rows, both_cols), dtype=float)
high_label = np.empty((0, num_lable_kinds), dtype=int)
short_data = np.empty((0, short_rows, both_cols), dtype=float)
short_label = np.empty((0, num_lable_kinds), dtype=int)

# predefine

## function of preprocess

### function of finding a hit

In [None]:
def slope(input_data):
    slope_col = np.zeros(len(input_data),dtype=float)
    for i in range(len(input_data) - 1):
        for j in range(1,4):
            temp = input_data[i+1][j] - input_data[i][j]
            slope_col[i] += temp**2
        slope_col[i] = slope_col[i]**0.5
    return slope_col

def ma(slope_col, n):
    slope_ma = np.zeros(len(slope_col),dtype=float)
    for i in range(n,len(slope_col)-n):
        for j in range(-n,n+1):
            slope_ma[i] += slope_col[i+j]
        slope_ma[i] /= float(2*n+1)
    return slope_ma

def data_cut(input_data, save_data, hit_type):
    slope_col = slope(input_data)
    slope_avg = np.average(slope_col)
    if hit_type == "high":
        n = 6
    elif hit_type == "short":
        n = 5
    slope_ma = ma(slope_col, n)

    for i in range(60,len(slope_col)-50):

        if ( slope_ma[i] > slope_avg ) and ( slope_col[i]==max(slope_col[i-50:i+50]) ) : # 找到可能峰值
            start = 0  # 向前&向後找起點
            end = 0
            while i+start > (50+n) :
                start -= 1
                if slope_ma[i+start] <= slope_avg:
                    break
            while i+end < (len(slope_col)-50-n) :
                end += 1
                if slope_ma[i+end] <= slope_avg:
                    break

            if  hit_type == "high" and end-start > 40 and slope_ma[i] > 15: # 長遠球
                save_data.append(input_data[i-45:i+15, [1,2,3,5,6,7]])

            elif hit_type == "short" and end-start > 20 and slope_ma[i] > 15: # 挑球
                save_data.append(input_data[i-35:i+15, [1,2,3,5,6,7]])

### other function

In [None]:
# Removes null bytes from the input file and returns a sanitized version of the file.
def sanitize_file(input_file_path):
  sanitized_content = ""
  with open(input_file_path, 'r', encoding='utf-8', errors='replace') as f:
    content = f.read()
    sanitized_content = content.replace('\x00', '')

  return sanitized_content

In [None]:
def hit_preprocess(input_file_path, hit_type):

  # Sanitize the file by removing null bytes
  sanitized_content = sanitize_file(input_file_path)
  lines = sanitized_content.split('\n')

  # Starting from the last line, move upwards until a complete line (with 7 commas) is found
  while lines and lines[-1].count(",") != 7:
    lines = lines[:-1]

  # Load the (potentially modified) data into a numpy array
  input_data = np.loadtxt(lines, delimiter=",", dtype=float)

  cut_data = []
  data_cut(input_data, cut_data, hit_type)
  cut_data = np.array(cut_data).astype(float)

  datas_label = np.empty((0, num_lable_kinds), dtype=int)
  label_values = np.array([int(ch) for ch in input_filename[6:11]])  # 轉換為整數陣列
  label_values = label_values[np.newaxis, :]  # 增加一個維度以使其成為二維陣列
  for i in range(len(cut_data)):
    datas_label = np.vstack((datas_label, label_values))

  return cut_data,datas_label

## preprocess

In [None]:
# 指定資料集所在的路徑
data_path = "/content/"
# 取得該路徑下所有的檔案名稱
all_files = os.listdir(data_path)
# 過濾出所有的 .txt 檔案
txt_files = [file_name for file_name in all_files if file_name.endswith('.txt')]

for input_filename in txt_files:

  if "high" in input_filename.lower():
    hit_type = "high"
  elif "short" in input_filename.lower():
    hit_type = "short"
  else:
    print("error txt-file")
    continue

  input_file_path = os.path.join(data_path, input_filename)
  data, label = hit_preprocess(input_file_path, hit_type)

  if hit_type == "high":
    high_data = np.concatenate((high_data, data), axis=0)
    high_label = np.concatenate((high_label, label))
  elif hit_type == "short":
    short_data = np.concatenate((short_data, data), axis=0)
    short_label = np.concatenate((short_label, label))

  os.remove(input_file_path)

## function to reorder and split

### shuffle, shuffle_direct

In [None]:
#input:data and label, output:numpy with same shape, but shuffled
#process: shuffle (data,label)
def shuffle(datas,labels):
  if len(datas) != len(labels):
    print("error")
    return


  shuffled_datas = []
  shuffled_labels = []

  # Shuffle the data indices
  indices = np.arange(len(datas))
  np.random.shuffle(indices)

  for i in range(len(datas)):
    shuffled_datas.append(datas[indices[i]])
    shuffled_labels.append(labels[indices[i]])

  return np.array(shuffled_datas), np.array(shuffled_labels)

In [None]:
#direct change input's data and label
def shuffle_direct(datas,labels):
  if len(datas) != len(labels):
    print("error")
    return


  shuffled_datas_list = []
  shuffled_labels_list = []

  # Shuffle the data indices
  indices = np.arange(len(datas))
  np.random.shuffle(indices)

  for i in range(len(datas)):
    shuffled_datas_list.append(datas[indices[i]])
    shuffled_labels_list.append(labels[indices[i]])

  shuffled_datas = np.array(shuffled_datas_list)
  shuffled_labels = np.array(shuffled_labels_list)

  datas[:] = shuffled_datas
  labels[:] = shuffled_labels

### sort, sort_direct

In [None]:
#input:data and label, output:numpy with same shape, but sorted
#process: sort (data,label)
def sort(datas,labels):
  if len(datas) != len(labels):
    print("error")
    return

  label_sums = labels.sum(axis=1)
  sorted_indice = np.argsort(label_sums,kind='stable')

  sorted_datas = []
  sorted_labels = []

  for i in range(len(datas)):
    sorted_datas.append(datas[sorted_indice[i]])
    sorted_labels.append(labels[sorted_indice[i]])

  return np.array(sorted_datas), np.array(sorted_labels)

In [None]:
#direct change input's data and label
def sort_direct(datas,labels):
  if len(datas) != len(labels):
    print("error")
    return

  label_sums = labels.sum(axis=1)
  sorted_indice = np.argsort(label_sums,kind='stable')

  sorted_datas_list = []
  sorted_labels_list = []

  for i in range(len(datas)):
    sorted_datas_list.append(datas[sorted_indice[i]])
    sorted_labels_list.append(labels[sorted_indice[i]])

  sorted_datas = np.array(sorted_datas_list)
  sorted_labels = np.array(sorted_labels_list)

  datas[:] = sorted_datas
  labels[:] = sorted_labels

### split_percentage, split_equal_to_n

In [None]:
#input: data, label and percentage of test
#output: numpy, (train_data, train_label, test_data, test_label)
def split_percentage(datas,labels,test_percent):
  num_train = round(len(datas) * (1 - test_percent))

  train_data = datas[:num_train]
  train_label = labels[:num_train]
  test_data = datas[num_train:]
  test_label = labels[num_train:]
  return train_data, train_label, test_data, test_label

In [None]:
#input: data, label and number of client
#output: numpy, [ data1 data2 ...], data1 = output[0]
def split_equal_to_n(datas,labels,n_Clients):
  num_items_per_client = len(datas) // n_Clients
  client_data = []
  client_label = []

  for i in range(n_Clients):
    start_idx = i * num_items_per_client
    end_idx = (i + 1) * num_items_per_client
    client_data.append(datas[start_idx:end_idx])
    client_label.append(labels[start_idx:end_idx])

  return np.array(client_data), np.array(client_label)

## function to average weight and split score

### average_weight

In [None]:
def average_weight(models):
  weights = [model.get_weights() for model in models]

  avg_weights = list()
  for weights_list_tuple in zip(*weights):
    avg_weights.append(
        np.array([np.array(w).mean(axis=0) for w in zip(*weights_list_tuple)])
    )

  return avg_weights

### split_label_score

In [None]:
def split_label_score(datas,labels):
  if len(datas) != len(labels):
    print("error")
    return

  client_data = []
  client_label = []

  stdt, stlb = sort(datas,labels)
  means = np.mean(stlb, axis=1)
  rounded_means = np.round(means)

  start_idx = 0
  end_idx = 0
  temp_label = 1

  for i in range(len(stlb)):
    if rounded_means[i] != temp_label:
      end_idx = i
      client_data.append(stdt[start_idx:end_idx])
      client_label.append(stlb[start_idx:end_idx])
      start_idx = i
      temp_label = temp_label + 1
  end_idx = len(stlb)
  client_data.append(stdt[start_idx:end_idx])
  client_label.append(stlb[start_idx:end_idx])

  while temp_label < num_lable_kinds :
    client_data.append(stdt[end_idx:end_idx])
    client_label.append(stlb[end_idx:end_idx])
    temp_label=temp_label+1

  return np.array(client_data), np.array(client_label)

# models

### output, loss, accuracy

In [None]:
# Define custom output function
def custom_output(x):
  # Implement your custom output logic here
  return x

def custom_loss(y_true, y_pred):
  y_true = tf.cast(y_true, dtype=tf.float32)  # Cast y_true to float32
  loss = tf.reduce_mean(tf.square(y_true - y_pred))
  return loss

# Define custom accuracy function
threshold = 0.5
def custom_accuracy(y_true, y_pred):
  abs_diff = tf.abs(y_true - y_pred)
  condition = tf.less_equal(abs_diff, threshold)
  acc = tf.cast(condition, tf.float32)
  return acc

### LSTM_mymodel

In [None]:
class LSTM_mymodel(Model):
    def __init__(self, units, output_dim, num_layers):
        super(LSTM_mymodel, self).__init__()
        self.lstm_layers = [LSTM(units, return_sequences=(i < num_layers - 1)) for i in range(num_layers)]
        self.dense = Dense(output_dim)

    def call(self, inputs):
        x = inputs
        for lstm_layer in self.lstm_layers:
            x = lstm_layer(x)
        output = self.dense(x)
        output = custom_output(output)
        return output

### LSTMdrop_mymodel

In [None]:
class LSTMdrop_mymodel(Model):
    def __init__(self, units, output_dim, num_layers, dropout_rate, learning_rate=0.01):
        super(LSTMdrop_mymodel, self).__init__()
        self.lstm_layers = [LSTM(units, return_sequences=(i < num_layers - 1)) for i in range(num_layers)]
        self.dropout = Dropout(rate=dropout_rate)
        self.dense = Dense(output_dim)

    def call(self, inputs):
        x = inputs
        for lstm_layer in self.lstm_layers:
            x = lstm_layer(x)
            x = self.dropout(x)  # Apply dropout after each LSTM layer
        output = self.dense(x)
        output = custom_output(output)
        return output

### GRU_mymodel

In [None]:
class GRU_mymodel(Model):
    def __init__(self, units, output_dim, num_layers):
        super(GRU_mymodel, self).__init__()
        self.gru_layers = [GRU(units, return_sequences=(i < num_layers - 1)) for i in range(num_layers)]
        self.dense = Dense(output_dim)

    def call(self, inputs):
        x = inputs
        for gru_layer in self.gru_layers:
            x = gru_layer(x)
        output = self.dense(x)
        output = custom_output(output)
        return output

### GRUdrop_mymodel

In [None]:
class GRUdrop_mymodel(Model):
    def __init__(self, units, output_dim, num_layers, dropout_rate):
        super(GRUdrop_mymodel, self).__init__()
        self.gru_layers = [GRU(units, return_sequences=(i < num_layers - 1)) for i in range(num_layers)]
        self.dropout = Dropout(rate=dropout_rate)
        self.dense = Dense(output_dim)

    def call(self, inputs):
        x = inputs
        for gru_layer in self.gru_layers:
            x = gru_layer(x)
            x = self.dropout(x)  # Apply dropout after each GRU layer
        output = self.dense(x)
        output = custom_output(output)
        return output

# test function

### runiidLSTM

In [None]:
def runiidLSTM(datas,labels,test_data,test_label,num_batch):
  iid_LSTM_models = [] #high: 長球

  for i in range(num_model):
    iid_LSTM_models.append(LSTM_mymodel(units=64, output_dim=output_dim, num_layers=num_layers))

  for model in iid_LSTM_models:
    model.compile(optimizer='adam', loss=custom_loss, metrics=[custom_accuracy])

  for j in range(num_round):
    for i, model in enumerate(iid_LSTM_models):
      model.fit(datas[i+j*num_model], labels[i+j*num_model],epochs=Epochs, batch_size=num_batch, verbose=0)

    avg_weight = average_weight(iid_LSTM_models)

    for model in iid_LSTM_models:
      model.set_weights(avg_weight)

  print("\nLSTM")
  iid_LSTM_loss, iid_LSTM_accuracy = iid_LSTM_models[0].evaluate(test_data, test_label)

### runsortLSTM

In [None]:
def runsortLSTM(datas,labels,test_data,test_label,num_batch):
  sort_LSTM_models = [] #high: 長球

  for i in range(num_model):
    sort_LSTM_models.append(LSTM_mymodel(units=64, output_dim=output_dim, num_layers=num_layers))

  for model in sort_LSTM_models:
    model.compile(optimizer='adam', loss=custom_loss, metrics=[custom_accuracy])

  for j in range(num_round):
    for i, model in enumerate(sort_LSTM_models):
      model.fit(datas[i*num_round+j], labels[i*num_round+j],epochs=Epochs, batch_size=num_batch, verbose=0)

    avg_weight = average_weight(sort_LSTM_models)

    for model in sort_LSTM_models:
      model.set_weights(avg_weight)

  print("\nLSTM")
  sort_LSTM_loss, sort_LSTM_accuracy = sort_LSTM_models[0].evaluate(test_data, test_label)

### runscoreLSTM

In [None]:
def runscoreLSTM(datas_12,labels_12,datas_345,labels_345,test_data,test_label,num_batch):
  score_LSTM_models = [] #high: 長球

  for i in range(2):
    score_LSTM_models.append(LSTM_mymodel(units=64, output_dim=output_dim, num_layers=num_layers))

  for model in score_LSTM_models:
    model.compile(optimizer='adam', loss=custom_loss, metrics=[custom_accuracy])

  for j in range(num_round):

    score_LSTM_models[0].fit(datas_12[j],labels_12[j],epochs=Epochs, batch_size=num_batch, verbose=0)
    score_LSTM_models[1].fit(datas_345[j],labels_345[j],epochs=Epochs, batch_size=num_batch, verbose=0)

    avg_weight = average_weight(score_LSTM_models)

    for model in score_LSTM_models:
      model.set_weights(avg_weight)

  print("\nLSTM")
  score_LSTM_loss, score_LSTM_accuracy = score_LSTM_models[0].evaluate(test_data, test_label)

### runiidGRU

In [None]:
def runiidGRU(datas,labels,test_data,test_label,num_batch):
  iid_GRU_models = [] #high: 長球

  for i in range(num_model):
    iid_GRU_models.append(GRU_mymodel(units=64, output_dim=output_dim, num_layers=num_layers))

  for model in iid_GRU_models:
    model.compile(optimizer='adam', loss=custom_loss, metrics=[custom_accuracy])

  for j in range(num_round):
    for i, model in enumerate(iid_GRU_models):
      model.fit(datas[i+j*num_model], labels[i+j*num_model],epochs=Epochs, batch_size=num_batch, verbose=0)

    avg_weight = average_weight(iid_GRU_models)

    for model in iid_GRU_models:
      model.set_weights(avg_weight)

  print("\nGRU")
  iid_GRU_loss, iid_GRU_accuracy = iid_GRU_models[0].evaluate(test_data, test_label)

### runsortGRU

In [None]:
def runsortGRU(datas,labels,test_data,test_label,num_batch):
  sort_GRU_models = [] #high: 長球

  for i in range(num_model):
    sort_GRU_models.append(GRU_mymodel(units=64, output_dim=output_dim, num_layers=num_layers))

  for model in sort_GRU_models:
    model.compile(optimizer='adam', loss=custom_loss, metrics=[custom_accuracy])

  for j in range(num_round):
    for i, model in enumerate(sort_GRU_models):
      model.fit(datas[i*num_round+j], labels[i*num_round+j],epochs=Epochs, batch_size=num_batch, verbose=0)

    avg_weight = average_weight(sort_GRU_models)

    for model in sort_GRU_models:
      model.set_weights(avg_weight)

  print("\nGRU")
  sort_GRU_loss, sort_GRU_accuracy = sort_GRU_models[0].evaluate(test_data, test_label)

### runscoreGRU

In [None]:
def runscoreGRU(datas_12,labels_12,datas_345,labels_345,test_data,test_label,num_batch):
  score_GRU_models = [] #high: 長球

  for i in range(2):
    score_GRU_models.append(GRU_mymodel(units=64, output_dim=output_dim, num_layers=num_layers))

  for model in score_GRU_models:
    model.compile(optimizer='adam', loss=custom_loss, metrics=[custom_accuracy])

  for j in range(num_round):

    score_GRU_models[0].fit(datas_12[j],labels_12[j],epochs=Epochs, batch_size=num_batch, verbose=0)
    score_GRU_models[1].fit(datas_345[j],labels_345[j],epochs=Epochs, batch_size=num_batch, verbose=0)

    avg_weight = average_weight(score_GRU_models)

    for model in score_GRU_models:
      model.set_weights(avg_weight)

  print("\nGRU")
  score_GRU_loss, score_GRU_accuracy = score_GRU_models[0].evaluate(test_data, test_label)

# test

In [None]:
# constant
test_percent = 0.1
num_layers = 1
Dropout_Rate = 0.2

Epochs = 30
# Batch_Size = 32
# constant for fedavg
num_model = 3
num_round = 2
# constant
num_run = 3

Batch = [10,16,20,24,32,40]


In [None]:
print(high_data.shape)
shuffle_direct(high_data, high_label)
train_data, train_label, test_data, test_label = split_percentage(high_data,high_label,test_percent)
print(train_data.shape)

(848, 60, 6)
(763, 60, 6)


## Centralize

In [None]:
for i in range(6):

  Batch_Size = Batch[i]
  print("\nbatchsize : ",Batch_Size)

  for _ in range(num_run):
    cen_LSTM = LSTM_mymodel(units=64, output_dim=output_dim, num_layers=num_layers)
    cen_LSTM.compile(optimizer='adam', loss=custom_loss, metrics=[custom_accuracy])
    cen_LSTM.fit(train_data, train_label, epochs=Epochs, batch_size=Batch_Size, verbose=0)
    print("\nLSTM")
    cen_LSTM_loss, cen_LSTM_accuracy = cen_LSTM.evaluate(test_data, test_label)

    cen_GRU = GRU_mymodel(units=64, output_dim=output_dim, num_layers=num_layers)
    cen_GRU.compile(optimizer='adam', loss=custom_loss, metrics=[custom_accuracy])
    cen_GRU.fit(train_data, train_label, epochs=Epochs, batch_size=Batch_Size, verbose=0)
    print("\nGRU")
    cen_GRU_loss, cen_GRU_accuracy = cen_GRU.evaluate(test_data, test_label)

## IID run

In [None]:
# divide data
hi_iid_datas, hi_iid_labels = split_equal_to_n(train_data, train_label, num_model*num_round)

In [None]:
for i in range(6):
  Batch_Size = Batch[i]
  print("\nbatchsize : ",Batch_Size)
  for _ in range(num_run):
    runiidLSTM(hi_iid_datas,hi_iid_labels,test_data,test_label,Batch_Size)
    runiidGRU(hi_iid_datas,hi_iid_labels,test_data,test_label,Batch_Size)

## sort run

In [None]:
# divide data
sort_direct(train_data, train_label)
hi_sort_datas, hi_sort_labels = split_equal_to_n(train_data, train_label, num_model*num_round)

In [None]:
for i  in range(6):
  Batch_Size = Batch[i]
  print("\nbatchsize : ",Batch_Size)
  for _ in range(num_run):
    runsortLSTM(hi_sort_datas,hi_sort_labels,test_data,test_label,Batch_Size)
    runsortGRU(hi_sort_datas,hi_sort_labels,test_data,test_label,Batch_Size)

## score run

In [None]:
hi_score_datas, hi_score_labels = split_label_score(train_data, train_label)

data_12 = hi_score_datas[0]
data_12 = np.concatenate((data_12, hi_score_datas[1]),axis=0)
label_12 = hi_score_labels[0]
label_12 = np.concatenate((label_12, hi_score_labels[1]),axis=0)

data_345 = hi_score_datas[2]
data_345 = np.concatenate((data_345, hi_score_datas[3]),axis=0)
data_345 = np.concatenate((data_345, hi_score_datas[4]),axis=0)
label_345 = hi_score_labels[2]
label_345 = np.concatenate((label_345, hi_score_labels[3]),axis=0)
label_345 = np.concatenate((label_345, hi_score_labels[4]),axis=0)

shuffle_direct(data_12,label_12)
shuffle_direct(data_345,label_345)

hi_12_datas, hi_12_labels = split_equal_to_n(data_12, label_12, num_round)
hi_345_datas, hi_345_labels = split_equal_to_n(data_345, label_345, num_round)

  return np.array(client_data), np.array(client_label)


In [None]:
for i in range(6):
  Batch_Size = Batch[i]
  print("\nbatchsize : ",Batch_Size)
  for _ in range(num_run):
    runscoreLSTM(hi_12_datas,hi_12_labels,hi_345_datas,hi_345_labels,test_data,test_label,Batch_Size)
    runscoreGRU(hi_12_datas,hi_12_labels,hi_345_datas,hi_345_labels,test_data,test_label,Batch_Size)

# end