In [1]:
# To turn off warning messages
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

In [4]:
# Load the dataset
df = pd.read_csv('creditcard.csv') 

In [5]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


## Principal Component Analysis (PCA) 
Principal Component Analysis (PCA) is a dimensionality reduction technique used in machine learning and data analysis to transform high-dimensional data into a smaller set of uncorrelated variables called principal components while preserving as much variance as possible.

In credit card fraud dataset, features V1 to V28 are the principal components extracted from the original features through PCA. These transformed features represent a lower-dimensional space while still retaining the essential information required for fraud detection.

In [8]:
df.shape

(284807, 31)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [10]:
df.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

In [7]:
# Define feature descriptions
descriptions = {
    "Time": "Se principal component derived from PCA transformation of original features.",
    "V2": "Second principal component derived from PCA transformation of original features.",
    "V3": "Third principal component derived from PCA transformation of original features.",
    "V4": "Fourth principal component derived from PCA transformation of original features.",
    "V5": "Fifth principal component derived from PCA transformation of original features.",
    "V6": "Sixth principal component derived from PCA transformation of original features.",
    "V7": "Seventh principal component derived from PCA transformation of original features.",
    "V8": "Eighth principal component derived from PCA transformation of original features.",
    "V9": "Ninth principal component derived from PCA transformation of original features.",
    "V10": "Tenth principal component derived from PCA transformation of original features.",
    "V11": "Eleventh principal component derived from PCA transformation of original features.",
    "V12": "Twelfth principal component derived from PCA transformation of original features.",
    "V13": "Thirteenth principal component derived from PCA transformation of original features.",
    "V14": "Fourteenth principal component derived from PCA transformation of original features.",
    "V15": "Fifteenth principal component derived from PCA transformation of original features.",
    "V16": "Sixteenth principal component deconds elapsed between each transaction and the first transaction in the dataset.",
    "V1": "Firstrived from PCA transformation of original features.",
    "V17": "Seventeenth principal component derived from PCA transformation of original features.",
    "V18": "Eighteenth principal component derived from PCA transformation of original features.",
    "V19": "Nineteenth principal component derived from PCA transformation of original features.",
    "V20": "Twentieth principal component derived from PCA transformation of original features.",
    "V21": "Twenty-first principal component derived from PCA transformation of original features.",
    "V22": "Twenty-second principal component derived from PCA transformation of original features.",
    "V23": "Twenty-third principal component derived from PCA transformation of original features.",
    "V24": "Twenty-fourth principal component derived from PCA transformation of original features.",
    "V25": "Twenty-fifth principal component derived from PCA transformation of original features.",
    "V26": "Twenty-sixth principal component derived from PCA transformation of original features.",
    "V27": "Twenty-seventh principal component derived from PCA transformation of original features.",
    "V28": "Twenty-eighth principal component derived from PCA transformation of original features.",
    "Amount": "Transaction amount in monetary value.",
    "Class": "Target variable: 0 for legitimate transactions, 1 for fraudulent transactions."
}

# Create a data dictionary template
data_dict = pd.DataFrame({
    "Feature": df.columns,
    "Description": [descriptions.get(col, "Unknown feature") for col in df.columns]  # Default description for unknown columns
})

# Save to CSV
data_dict.to_csv("data_dictionary.csv", index=False)

print("Data dictionary CSV file has been created successfully.")


Data dictionary CSV file has been created successfully.
