### 데이터사이언스

In [1]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [2]:
train_data = pd.read_csv('DS-main/datas/train.csv')
test_data = pd.read_csv('DS-main/datas/test.csv')

In [6]:
df = train_data

# Groups of columns as defined
user_info_cols = ['User', 'Gender', 'Current Age', 'Retirement Age', 'Birth Year', 'Birth Month', 'Zipcode',
                  'Per Capita Income - Zipcode', 'Yearly Income', 'Total Debt', 'Credit Score']

card_info_cols = ['Card', 'Card Brand', 'Card Type', 'Card Number', 'Expires', 'Has Chip',
                  'Credit Limit', 'Acct Open Date', 'Year PIN last Changed']

transaction_info_cols = ['Year', 'Month', 'Day', 'Amount', 'Merchandise Code']

security_fraud_info_cols = ['Whether Security Chip is Used', 'Error Message', 'Is Fraud?']

# Function to apply PCA to a group of columns
def apply_pca(df, columns, n_components=0.95):
    # Standardize the data
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df[columns].select_dtypes(include=[float, int]))
    
    # Apply PCA
    pca = PCA(n_components=n_components)  # Keep 95% of variance
    pca_result = pca.fit_transform(scaled_data)
    
    # Return DataFrame with PCA results
    pca_df = pd.DataFrame(pca_result, columns=[f'PC{i+1}' for i in range(pca_result.shape[1])])
    return pca_df, pca.explained_variance_ratio_

# Function to save the PCA result and explained variance to a txt file
def save_pca_to_txt(pca_df, explained_variance, filename):
    with open(filename, 'w') as file:
        file.write("PCA Results:\n")
        file.write(pca_df.to_string())
        file.write("\n\nExplained Variance Ratio:\n")
        file.write(str(explained_variance))

# Apply PCA to each group and save to text files
pca_user_info, var_user_info = apply_pca(df, user_info_cols)
save_pca_to_txt(pca_user_info, var_user_info, 'pca_user_info.txt')

pca_card_info, var_card_info = apply_pca(df, card_info_cols)
save_pca_to_txt(pca_card_info, var_card_info, 'pca_card_info.txt')

pca_transaction_info, var_transaction_info = apply_pca(df, transaction_info_cols)
save_pca_to_txt(pca_transaction_info, var_transaction_info, 'pca_transaction_info.txt')

pca_security_fraud_info, var_security_fraud_info = apply_pca(df, security_fraud_info_cols)
save_pca_to_txt(pca_security_fraud_info, var_security_fraud_info, 'pca_security_fraud_info.txt')

print("PCA results saved to text files.")

PCA - User Info:         PC1       PC2       PC3       PC4       PC5      PC6       PC7  \
0  1.547224  1.186552 -0.484155 -1.981151  1.525804  0.88928 -0.359976   
1  1.547224  1.186552 -0.484155 -1.981151  1.525804  0.88928 -0.359976   
2  1.547224  1.186552 -0.484155 -1.981151  1.525804  0.88928 -0.359976   
3  1.547224  1.186552 -0.484155 -1.981151  1.525804  0.88928 -0.359976   
4  1.547224  1.186552 -0.484155 -1.981151  1.525804  0.88928 -0.359976   

        PC8       PC9  
0  0.716238 -0.954456  
1  0.716238 -0.954456  
2  0.716238 -0.954456  
3  0.716238 -0.954456  
4  0.716238 -0.954456  
PCA - Card Info:         PC1       PC2       PC3      PC4
0  0.233724  0.511934 -1.659739  0.24869
1  0.233724  0.511934 -1.659739  0.24869
2  0.233724  0.511934 -1.659739  0.24869
3  0.233724  0.511934 -1.659739  0.24869
4  0.233724  0.511934 -1.659739  0.24869
PCA - Transaction Info:         PC1       PC2       PC3       PC4
0  0.475629 -2.232074  0.004659  1.082067
1  0.721707 -1.998653 -