In [15]:
# Import library
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [16]:
# Load data
df = pd.read_excel('data.xlsx', sheet_name='Rekapan')

In [None]:
# Muncul data awal dan akhir
print(df.head())
print(df.tail())

In [None]:
# Statistik deskriptif dasar
print(df.describe())

# Cek nilai yang hilang
print(df.isnull().sum())

# Visualisasi distribusi data
df.hist(bins=50, figsize=(20, 15))
plt.show()

In [None]:
# Filtering only the relevant columns for descriptive statistics
data_filtered = df[['SO', 'TERKIRIM', 'Harga Komoditas Bijih Besi', 
                    'Indeks Produksi Dalam Negeri', 'Data Inflasi', 'Kurs']]

# Converting relevant columns to numeric
data_filtered = data_filtered.apply(pd.to_numeric, errors='coerce')

# Calculating descriptive statistics
descriptive_stats = data_filtered.describe().transpose()

# Adding additional statistics: count of non-null values
descriptive_stats['count'] = data_filtered.count()

# Reordering the columns for clarity
descriptive_stats = descriptive_stats[['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']]

# Displaying the descriptive statistics
print(descriptive_stats)

# Creating a heatmap-like table for descriptive statistics
fig, ax = plt.subplots(figsize=(12, 8))

# Visualizing the descriptive statistics using heatmap style
sns.heatmap(descriptive_stats, annot=True, fmt=".2f", cmap="Reds", cbar=False, ax=ax, 
            linewidths=0.5, linecolor='gray', xticklabels=True, yticklabels=True)

# Adjusting the x-axis labels to be at the top
ax.xaxis.tick_top()

# Setting title and layout adjustments
ax.set_title('Statistik Deskriptif Data', fontsize=16, fontweight='bold', pad=20)
plt.xticks(rotation=45, ha='left')
plt.tight_layout()

# Display the plot
plt.show()

In [None]:
missing_values = df.isnull().sum()
missing_percentage = (missing_values / len(df)) * 100

# Combine into a DataFrame
missing_df = pd.DataFrame({
    'Number_of_NaN': missing_values,
    'Percent(%)': missing_percentage
}).reset_index()

# Rename the columns for clarity
missing_df.columns = ['Column', 'Number_of_NaN', 'Percent(%)']

# Display the result
print(missing_df)



In [None]:
# Select only numeric columns
numeric_df = df.select_dtypes(include=['float64', 'int64'])

# Calculate the correlation matrix
correlation_matrix = numeric_df.corr()

# Plotting the correlation matrix as a heatmap
plt.figure(figsize=(14, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='inferno', vmin=0.1, vmax=1, linewidths=0.5, linecolor='gray')
plt.title('Correlation Matrix', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()
