# Imports

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans

## Load dataset

In [None]:
import pandas as pd
df = pd.read_csv('JO_pivoted.csv')
print(df.columns)
df

### Lets select some features for dimensionality reduction
We will also run k-Means clustering to assign some colors to the clusters

In [None]:

#['barley', 'fallow land', 'oats', 'spring barley', 'spring wheat', 'total arable land', 'winter barley', 'winter rape', 'winter turnip rape', 'winter wheat']
crop_features = ['barley', 'fallow land', 'oats', 'spring barley', 'spring wheat', 'total arable land', 'winter barley', 'winter rape', 'winter turnip rape', 'winter wheat']
X_crop = df[crop_features]

kmeans = KMeans(n_clusters=4, random_state=0, n_init=10)
df['crop_cluster'] = kmeans.fit_predict(X_crop)

# PCA
pca = PCA(n_components=2)
crop_pca = pca.fit_transform(X_crop)
crop_pca_df = pd.DataFrame(crop_pca, columns=["PC1", "PC2"])
crop_pca_df.index = X_crop.index
crop_pca_df["cluster"] = df.loc[X_crop.index, 'crop_cluster']

# t-SNE function
def run_tsne(perplexity=30, random_state=0):
    tsne = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=perplexity, random_state=random_state)
    embedding = tsne.fit_transform(X_crop)
    emb_df = pd.DataFrame(embedding, columns=["TSNE1", "TSNE2"])
    emb_df.index = X_crop.index
    emb_df["cluster"] = df.loc[X_crop.index, 'crop_cluster']
    return emb_df

# Plot PCA
plt.figure(figsize=(8,6))
sns.scatterplot(x="PC1", y="PC2", hue="cluster", data=crop_pca_df, palette="tab10")
plt.title('PCA of Crop Features (colored by cluster)')
plt.show()

# Plot t-SNE for different perplexities
perplexities = [5, 10, 25, 50, 100]
fig, axes = plt.subplots(1, len(perplexities), figsize=(20,5))
for i, p in enumerate(perplexities):
    tsne_df = run_tsne(perplexity=p)
    sns.scatterplot(ax=axes[i], x="TSNE1", y="TSNE2", hue="cluster", data=tsne_df, palette="tab10", legend=False)
    axes[i].set_title(f't-SNE (perplexity={p})')
plt.tight_layout()
plt.show()



## Lets reduce the dimensions by both methods

In [None]:
pca = PCA(n_components=2)
crop_pca = pca.fit_transform(X_crop)
crop_pca_df = pd.DataFrame(crop_pca, columns=["PC1", "PC2"])
crop_pca_df.index = X_crop.index
crop_pca_df["cluster"] = df.loc[X_crop.index, 'crop_cluster']

def run_tsne(perplexity=30, random_state=0):
    tsne = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=perplexity, random_state=random_state)
    embedding = tsne.fit_transform(X_crop)
    emb_df = pd.DataFrame(embedding, columns=["TSNE1", "TSNE2"])
    emb_df.index = X_crop.index
    emb_df["cluster"] = df.loc[X_crop.index, 'crop_cluster']
    return emb_df

## Lets 