In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.manifold import Isomap, LocallyLinearEmbedding, SpectralEmbedding, TSNE

In [4]:
df = pd.read_csv("datasets/iris.csv")

In [19]:
conts = []
for col in df.columns:
    if str(df[col].dtype) == "float64" or str(df[col].dtype) == "int64":
        conts.append(col)
        
print(conts)

['sepal_length', 'sepal_width', 'petal_length', 'petal_width']


In [3]:
df = pd.read_csv("datasets/mpg.csv", index_col=0)
df.head()

Unnamed: 0,transmission,cylinder,car_size,year,mpg_city,mpg_highway,displacement
0,auto,few,small,1985,17.0,17.0,2.296
1,auto,few,small,1985,17.0,17.0,2.296
2,lock-up,medium,small,1985,21.0,27.0,2.4436
3,lock-up,medium,small,1985,18.0,24.0,2.6896
4,manual,medium,small,1985,18.0,23.0,2.6896


In [6]:
df_new=df.sample(1000)
len(df_new)

1000

#### Spectral Embedding

In [91]:
df_org = pd.read_csv("datasets/iris.csv")
df = pd.read_csv("datasets/iris.csv", usecols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])
embedding = SpectralEmbedding(n_components=2)
df_trans = embedding.fit_transform(df)
df_org["Emb_dim1"] = df_trans[:,0]
df_org["Emb_dim2"] = df_trans[:,1]
df_org.to_csv("embedded_datasets/iris_spectrale.csv")
df_org.head()



Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,Emb_dim1,Emb_dim2
0,5.1,3.5,1.4,0.2,setosa,0.162884,3.532708e-16
1,4.9,3.0,1.4,0.2,setosa,0.135737,2.042441e-15
2,4.7,3.2,1.3,0.2,setosa,0.144786,2.331468e-15
3,4.6,3.1,1.5,0.2,setosa,0.140261,2.185476e-15
4,5.0,3.6,1.4,0.2,setosa,0.162884,4.710277e-16


#### Isomap

In [92]:
df_org = pd.read_csv("datasets/iris.csv")
df = pd.read_csv("datasets/iris.csv", usecols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])
embedding = Isomap(n_components=2)
df_trans = embedding.fit_transform(df)
df_org["Emb_dim1"] = df_trans[:,0]
df_org["Emb_dim2"] = df_trans[:,1]
df_org.to_csv("embedded_datasets/iris_isomap.csv")
df_org.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,Emb_dim1,Emb_dim2
0,5.1,3.5,1.4,0.2,setosa,-0.118155,-0.381038
1,4.9,3.0,1.4,0.2,setosa,-0.113729,-0.323243
2,4.7,3.2,1.3,0.2,setosa,-0.113922,-0.325955
3,4.6,3.1,1.5,0.2,setosa,-0.113904,-0.325207
4,5.0,3.6,1.4,0.2,setosa,-0.117613,-0.372992


#### Locally Linear Embedding

In [101]:
df_org = pd.read_csv("datasets/iris.csv")
df = pd.read_csv("datasets/iris.csv", usecols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])
embedding = LocallyLinearEmbedding(n_components=2)
df_trans = embedding.fit_transform(df)
df_org["Emb_dim1"] = df_trans[:,0]
df_org["Emb_dim2"] = df_trans[:,1]
df_org.to_csv("embedded_datasets/iris_lle.csv")
df_org.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,Emb_dim1,Emb_dim2
0,5.1,3.5,1.4,0.2,setosa,0.0,0.0
1,4.9,3.0,1.4,0.2,setosa,0.0,0.0
2,4.7,3.2,1.3,0.2,setosa,0.0,0.0
3,4.6,3.1,1.5,0.2,setosa,0.0,0.0
4,5.0,3.6,1.4,0.2,setosa,0.0,0.0


#### t-SNE

In [123]:
df_org = pd.read_csv("datasets/iris.csv")
df = pd.read_csv("datasets/iris.csv", usecols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])
embedding = TSNE(n_components=2)
df_trans = embedding.fit_transform(df)
df_org["Emb_dim1"] = df_trans[:,0]
df_org["Emb_dim2"] = df_trans[:,1]
df_org.to_csv("embedded_datasets/iris_tsne.csv")
df_org.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,Emb_dim1,Emb_dim2
0,5.1,3.5,1.4,0.2,setosa,12.528808,-22.938295
1,4.9,3.0,1.4,0.2,setosa,12.905942,-25.360151
2,4.7,3.2,1.3,0.2,setosa,12.229882,-25.706732
3,4.6,3.1,1.5,0.2,setosa,12.004704,-25.912086
4,5.0,3.6,1.4,0.2,setosa,12.104832,-22.881603


In [20]:
df = pd.read_csv("datasets/mpg.csv")
df = pd.DataFrame(df, columns=['trans', 'cyl', 'T', 'class', 'year', 'cty', 'hwy', 'displ'])
df = df.rename(columns={'cty': 'mpg_city', 'hwy': 'mpg_highway', 'trans': 'transmission',
                        'T': 'turbo', 'cyl': 'cylinder', 'class': 'car_size', 'displ': 'displacement'})
df = pd.DataFrame(df, columns=['transmission', 'cylinder', 'turbo', 'car_size', 'year', 'mpg_city', 'mpg_highway', 'displacement'])

df.car_size.replace(to_replace={'pickup': 'large', 'suv': 'midsize', 'station wagon': 'midsize', 'compact': 'small', 'passenger van': 'large', 'cargo van': 'large', 'two seater': 'small', 'large car': 'large', 'midsize car': 'midsize', 'compact car': 'small'}, inplace=True)

df.cylinder.replace(to_replace={2: 'few', 3: 'few', 4: 'few', 5: 'medium', 6: 'medium', 7: 'medium', 8: 'medium', 10: 'many', 12: 'many', 16: 'many'}, inplace=True)

df.replace(to_replace={'transmission': {'auto.*': 'auto'}}, inplace=True, regex=True)

df.replace(to_replace={'transmission': {'lock-up.*': 'lock-up'}}, inplace=True, regex=True)

df.replace(to_replace={'transmission': {'manual.*': 'manual'}}, inplace=True, regex=True)

df = df.drop(df[df.transmission == 'semi-auto'].index)
df = df.drop(df[df.transmission == 'creeper(C5)'].index)
df = df.drop(df[df.car_size == 'spv'].index)
df = df.dropna(axis=0)
df.reset_index(drop=True, inplace=True)

for col in ['transmission', 'cylinder', 'turbo', 'car_size']:
    df[col] = df[col].astype('str')

del df['turbo']
df_save = df

In [22]:
df_save.to_csv("datasets/mpg.csv")

In [127]:
df_org = df_save
df = pd.DataFrame(df_save, columns = ['year', 'mpg_city', 'mpg_highway', 'displacement'])
embedding = TSNE(n_components=2)
df_trans = embedding.fit_transform(df)
df_org["Emb_dim1"] = df_trans[:,0]
df_org["Emb_dim2"] = df_trans[:,1]
df_org.to_csv("embedded_datasets/mpg_tsne.csv")
df_org.head()

Unnamed: 0,transmission,cylinder,car_size,year,mpg_city,mpg_highway,displacement,Emb_dim1,Emb_dim2
0,auto,few,small,1985,17.0,17.0,2.296,-79.760368,51.356476
1,auto,few,small,1985,17.0,17.0,2.296,-79.760368,51.356476
2,lock-up,medium,small,1985,21.0,27.0,2.4436,77.266411,-46.673534
3,lock-up,medium,small,1985,18.0,24.0,2.6896,-12.142454,16.302458
4,manual,medium,small,1985,18.0,23.0,2.6896,-29.759903,17.950548


In [126]:
df_org = df_save
df = pd.DataFrame(df_save, columns = ['year', 'mpg_city', 'mpg_highway', 'displacement'])
embedding = SpectralEmbedding(n_components=2)
df_trans = embedding.fit_transform(df)
df_org["Emb_dim1"] = df_trans[:,0]
df_org["Emb_dim2"] = df_trans[:,1]
df_org.to_csv("embedded_datasets/mpg_spectrale.csv")
df_org.head()

Unnamed: 0,transmission,cylinder,car_size,year,mpg_city,mpg_highway,displacement,Emb_dim1,Emb_dim2
0,auto,few,small,1985,17.0,17.0,2.296,-0.316966,0.260845
1,auto,few,small,1985,17.0,17.0,2.296,-0.316966,0.260845
2,lock-up,medium,small,1985,21.0,27.0,2.4436,-0.3627,-0.294748
3,lock-up,medium,small,1985,18.0,24.0,2.6896,-0.420926,-0.038883
4,manual,medium,small,1985,18.0,23.0,2.6896,-0.426853,0.015979


In [None]:
df_org = df_save
df = pd.DataFrame(df_save, columns = ['year', 'mpg_city', 'mpg_highway', 'displacement'])
embedding = Isomap(n_components=2)
df_trans = embedding.fit_transform(df)
df_org["Emb_dim1"] = df_trans[:,0]
df_org["Emb_dim2"] = df_trans[:,1]
df_org.to_csv("embedded_datasets/mpg_isomap.csv")
df_org.head()

In [None]:
df_org = df_save
df = pd.DataFrame(df_save, columns = ['year', 'mpg_city', 'mpg_highway', 'displacement'])
embedding = LocallyLinearEmbeddingallyLinearEmbeddingallyLinearEmbedding(n_components=2)
df_trans = embedding.fit_transform(df)
df_org["Emb_dim1"] = df_trans[:,0]
df_org["Emb_dim2"] = df_trans[:,1]
df_org.to_csv("embedded_datasets/mpg_lle.csv")
df_org.head()