In [1]:
import torch
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import fetch_olivetti_faces
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
from torchvision.transforms.functional import InterpolationMode 
def plot_image(image, label):
    plt.imshow(image.squeeze(), cmap='gray')  
    plt.title(f"Label: {label}")
    plt.axis('off')
    plt.show()
    

def reshape_olivetti_faces(images, new_shape=(28, 28)):
    num_samples, original_height, original_width = images.shape
    reshaped_images = np.empty((num_samples, *new_shape))
    
    for i in range(num_samples):
        reshaped_images[i] = np.reshape(images[i], (28,28))
    
    return reshaped_images

def get_olivetti_faces_dataloader(batch_size=64, drop_last=True, shuffle=True):
    
    faces_data = fetch_olivetti_faces(shuffle=True, random_state=42)

    # Load your images as a NumPy array
    # Replace 'images_array' with your actual NumPy array
    olivetti_faces = fetch_olivetti_faces(shuffle=True, random_state=42)
    images_array = olivetti_faces.images
    labels = olivetti_faces.target
    
    # Define the target size (28x28)
    target_size = (28, 28)

    # Initialize an empty list to store resized images
    resized_images = []

    # Define the Resize transformation to resize the images
    resize_transform = transforms.Resize(target_size, interpolation=InterpolationMode.NEAREST)

    # Loop through each image in the array
    for image_data in images_array:
        # Convert the image data to a PIL image
        original_image = Image.fromarray(np.uint8(image_data * 255))  # Assuming the data is in the range [0, 1]

        # Apply the Resize transformation
        resized_image = resize_transform(original_image)

        # Convert the resized image back to a NumPy array
        resized_image_array = np.array(resized_image) / 255.0  # Rescale back to [0, 1]

        # Append the resized image to the list
        resized_images.append(resized_image_array)
    
    # Stack the resized images into a NumPy array
    resized_images_array = np.stack(resized_images)

    
    
    data = torch.Tensor(resized_images_array)
    plot_image(data[0], labels[0])
    data_shape = data.shape[1:]
    labels = torch.Tensor(labels)
    
    
    final_dataset = TensorDataset(data, labels)
    
    dataloader = DataLoader(final_dataset, batch_size=batch_size, drop_last=drop_last, shuffle=shuffle)
    
    return dataloader, data_shape

In [2]:
import matplotlib.pyplot as plt
def plot_faces(faces, labels, n_cols=100):
    faces = faces.reshape(-1, 28, 28)
    n_rows = (len(faces) - 1) // n_cols + 1
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(500,500))
    plt.subplots_adjust(wspace=0, hspace=0)  # Set spacing to 0

    for ax in axes.flat:
        ax.axis("off")

    for ax, (face, label) in zip(axes.flat, zip(faces, labels)):
        ax.imshow(face, cmap="gray")
        #ax.set_title(label)

    for ax in axes.flat[len(faces):]:
        ax.remove()

    plt.show()
    
data, similarity, similarity_to_labels, labels = autoencoder.get_similarity_data()
valid_indices = np.where(similarity_to_labels == 7)[0]
min_s = 100000000
max_s = -10000000

valid_indices_2 = list()
for index in valid_indices:
    cur_similarity = similarity[index][similarity_to_labels[index]]
    if(min_s > cur_similarity):
        min_s = cur_similarity
    if(max_s < cur_similarity):
        max_s = cur_similarity
    if( cur_similarity > 0.05 ):
        valid_indices_2.append(index)
print(min_s)
print(max_s)
print(valid_indices_2)        
plot_faces(data[valid_indices_2],labels[valid_indices_2])

NameError: name 'autoencoder' is not defined

In [None]:
from PIL import Image, ImageDraw
import numpy as np

# Define the size of each individual image
image_size = (28, 28)

# Create a blank canvas for the collage
collage_size = (image_size[0] * 10, image_size[1] * 10)
collage = Image.new('RGB', collage_size)

# Create a drawing context to paste images onto the canvas
draw = ImageDraw.Draw(collage)

# Initialize x and y positions for pasting images
x, y = 0, 0

# Generate random colored squares as image placeholders
for _ in range(10):
    for _ in range(10):
        color = tuple(np.random.randint(0, 256, 3))  # Generate a random RGB color
        draw.rectangle([x, y, x + image_size[0], y + image_size[1]], fill=color)
        x += image_size[0]
    x = 0
    y += image_size[1]

# Save the collage to a file or display it
collage.save('collage.png')  # Change the filename and format as needed
collage.show()  # Display the collage


In [None]:
import matplotlib.pyplot as plt
from PIL import Image

# Load the PNG image files
image1 = Image.open('collage_digits_8_8.png')
image2 = Image.open('collage_letters_8_8.png')

# Create a figure with two subfigures (subplots)
fig, axes = plt.subplots(1, 2, figsize=(10, 5))  # 1 row, 2 columns

# Display the first image in the first subfigure
axes[0].imshow(image1)
#axes[0].set_title('Subfigure 1')

# Display the second image in the second subfigure
axes[1].imshow(image2)
#axes[1].set_title('Subfigure 2')

# Add a title to the entire figure
#fig.suptitle('Figure with Two Subfigures')

# Remove axis labels and ticks
for ax in axes:
    ax.axis('off')

# Adjust spacing between subfigures
plt.tight_layout()

# Show the figure
plt.show()


In [None]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from sklearn.cluster import KMeans
from sklearn import metrics

categories = None

newsgroups_train = fetch_20newsgroups(subset='train')
labels = newsgroups_train.target

true_k = 12

#vectorize

vectorizer = TfidfVectorizer(max_df=0.5,
                             min_df=2,
                             stop_words='english')


X = vectorizer.fit_transform(newsgroups_train.data)

#clustering
km = KMeans(n_clusters=20, init='k-means++', max_iter=100, n_init=1)

km.fit(X)

order_centroids = km.cluster_centers_.argsort()[:, ::-1]


#Performance
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_))
print("Completeness: %0.3f" % metrics.completeness_score(labels, km.labels_))
print("V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_))
print("Silhouette Coefficient: %0.3f"
      % metrics.silhouette_score(X, km.labels_, sample_size=1000))

In [None]:
from sklearn.datasets import fetch_openml

# Load the USPS dataset
usps = fetch_openml(name="usps", version=2)

# Access the data and labels
X, y = usps.data, usps.target

In [None]:
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from sklearn.datasets import fetch_openml

class USPSDataset(Dataset):
    def __init__(self, split='train', transform=None):
        self.split = split
        self.transform = transform
        
        # Load the USPS dataset using Scikit-Learn
        usps = fetch_openml(name="usps", version=2)
        self.data, self.labels = usps.data, usps.target.astype(int)
        
        if self.split == 'train':
            self.data = self.data[:7291]  # Use the first 7291 samples for training
            self.labels = self.labels[:7291]
        elif self.split == 'test':
            self.data = self.data[7291:]  # Use the remaining samples for testing
            self.labels = self.labels[7291:]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data[idx].reshape(16, 16).astype('float32')  # Reshape the data to (16, 16)
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transformations if needed (e.g., converting to tensors)
transform = transforms.Compose([transforms.ToTensor()])

# Create USPS dataset instances for training and testing
usps_train_dataset = USPSDataset(split='train', transform=transform)
usps_test_dataset = USPSDataset(split='test', transform=transform)

# Example of how to access the data and labels
sample_image, sample_label = usps_train_dataset[0]
print("Sample Image Shape:", sample_image.shape)
print("Sample Label:", sample_label)


In [None]:
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import numpy as np
from PIL import Image
from Visualization import Visualization
visualization = Visualization()
def get_usps_np():
    # Load the USPS dataset using Scikit-Learn
    usps = fetch_openml(name="usps", version=2)
    
    # Extract the data and labels as NumPy arrays
    usps_data = usps.data.astype(float).to_numpy()
    usps_labels = usps.target.astype(int).to_numpy()
    data = np.reshape(usps_data, (-1, 1, 16, 16))
    visualization.plot_image(data[0], usps_labels[0])
    # Initialize an empty array for resized images
    usps_data_resized = np.zeros((len(usps_data), 28, 28))
    
    # Resize each image to 28x28 pixels and store in the new array
    for i in range(len(usps_data)):
        # Convert the image to a Pillow Image
        image = Image.fromarray(usps_data[i].reshape(16, 16).astype(float), 'L')
    
        # Resize the image to 28x28 pixels using antialiasing
        image_resized = image.resize((28, 28), Image.BILINEAR)
    
        # Convert the resized image back to a NumPy array
        usps_data_resized[i] = np.array(image_resized, dtype='float32') / 255.0
    
    
    # Print the shape of the resized data
    print("Resized Data Shape:", usps_data_resized.shape)
    
    data = np.reshape(usps_data_resized, (-1, 1, 28, 28))
    print(data[0].shape)
    labels = LabelEncoder().fit_transform(usps_labels)
    
    visualization.plot_image(data[0], labels[0])    
    return data, labels
data, labels = get_usps_np()

In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from PIL import Image, ImageFilter
import matplotlib.pyplot as plt

# Load the USPS dataset using Scikit-Learn
usps = fetch_openml(name="usps", version=2)

# Convert DataFrame to NumPy arrays
usps_data = usps.data.astype(float).to_numpy()
usps_labels = usps.target.astype(int).to_numpy()

# Initialize an empty array for resized images
usps_data_resized = np.zeros((len(usps_data), 28, 28))

# Resize each image to 28x28 pixels without losing quality using LANCZOS filter
for i in range(len(usps_data)):
    # Reshape the image to 16x16 pixels
    image = usps_data[i].reshape(16, 16).astype('uint8')
    
    # Convert the image to a Pillow Image
    image_pil = Image.fromarray(image, 'L')
    
    # Resize the image to 28x28 pixels without losing quality using LANCZOS filter
    image_resized = image_pil.resize((28, 28), Image.LANCZOS)
    
    # Convert the resized image back to a NumPy array
    usps_data_resized[i] = np.array(image_resized, dtype='float32') / 255.0

# Plot the resized image (e.g., the first image)
plt.figure()
plt.imshow(usps_data_resized[0], cmap='gray')  # cmap='gray' for grayscale
plt.title(f'USPS Label: {usps_labels[0]}')
plt.show()


In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from PIL import Image
import matplotlib.pyplot as plt

# Load the USPS dataset using Scikit-Learn
usps = fetch_openml(name="usps", version=2)

# Convert DataFrame to NumPy arrays
usps_data = usps.data.astype(float).to_numpy()
usps_labels = usps.target.astype(int).to_numpy()

# Choose an index to plot (e.g., the first image)
index_to_plot = 0

# Resize the chosen image to 28x28 pixels with BILINEAR interpolation
chosen_image = Image.fromarray(usps_data[index_to_plot].reshape(16, 16).astype('uint8'), 'L')
chosen_image_resized = chosen_image.resize((28, 28), Image.BILINEAR)

# Convert the resized image back to a NumPy array
resized_image = np.array(chosen_image_resized, dtype='float32') / 255.0

# Plot the resized image
plt.figure()
plt.imshow(resized_image[0], cmap='gray')  # cmap='gray' for grayscale
plt.title(f'USPS Label: {usps_labels[index_to_plot]}')
plt.show()


In [None]:
import numpy as np
from sklearn.datasets import fetch_openml




In [None]:
from mnist import MNIST
SHUFFLE = True 
IMG_SIZE = 28

folder_path = './Datasets/'
# Options: balanced, byclass, bymerge, digits, letters, mnist
mndata = MNIST(folder_path + 'EMNIST')

mndata.select_emnist('balanced') 
data, labels = mndata.load_training()
data_ts, labels_ts = mndata.load_testing()

data = np.vstack((data, data_ts))
labels = np.hstack((labels, labels_ts))
data = MinMaxScaler().fit_transform(data).astype(np.float32)
print(data[0].shape)
data = np.reshape(data, (-1, 1, 28, 28))
print(data[0].shape)
labels = np.array(labels)
labels = LabelEncoder().fit_transform(labels)



In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml

# Load the USPS dataset using Scikit-Learn
usps = fetch_openml(name="usps", version=2)

# Convert DataFrame to NumPy arrays
usps_data = usps.data.astype(float).to_numpy()
usps_labels = usps.target.astype(int).to_numpy()

# Reshape the USPS dataset images to 28x28 pixels
usps_data_reshaped = usps_data.reshape(-1, 16, 16)  # Reshape from (7291, 256) to (7291, 16, 16)

# Upscale the images to 28x28
usps_data_upscaled = np.zeros((len(usps_data_reshaped), 28, 28))
for i in range(len(usps_data_reshaped)):
    usps_data_upscaled[i] = np.kron(usps_data_reshaped[i], np.ones((2, 2)))

# Print the shapes of the data and labels
print("Reshaped Data Shape:", usps_data_upscaled.shape)
print("Training Labels Shape:", usps_labels.shape)


In [None]:
!pip install opencv-python
import numpy as np
from sklearn.datasets import fetch_openml
import cv2
import matplotlib.pyplot as plt

# Load the USPS dataset using Scikit-Learn
usps = fetch_openml(name="usps", version=2)

# Convert DataFrame to NumPy arrays
usps_data = usps.data.astype(float).to_numpy()
usps_labels = usps.target.astype(int).to_numpy()

# Initialize an empty array for resized images
usps_data_resized = np.zeros((len(usps_data), 28, 28))

# Resize each image to 28x28 pixels without losing quality
for i in range(len(usps_data)):
    # Reshape the image to 16x16 pixels
    image = usps_data[i].reshape(16, 16).astype('uint8')
    
    # Resize the image to 28x28 pixels without losing quality using INTER_LINEAR interpolation
    image_resized = cv2.resize(image, (28, 28), interpolation=cv2.INTER_LINEAR)
    
    # Convert the resized image back to a NumPy array
    usps_data_resized[i] = image_resized.astype('float32') / 255.0

# Plot the resized image (e.g., the first image)
plt.figure()
plt.imshow(usps_data[0], cmap='gray')  # cmap='gray' for grayscale
plt.title(f'USPS Label: {usps_labels[0]}')
plt.show()


In [None]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

image = usps_data[0]
print(image)
# Resize the image to 28x28 pixels
image_resized = image.resize((28, 28), Image.LANCZOS)  # You can use other resampling methods as well

# Convert the resized image to a NumPy array
image_array = np.array(image_resized)

# Plot the original and resized images
plt.figure(figsize=(6, 3))
plt.subplot(1, 2, 1)
plt.imshow(image, cmap='gray')
plt.title("Original (16x16)")
plt.subplot(1, 2, 2)
plt.imshow(image_array, cmap='gray')
plt.title("Resized (28x28)")
plt.show()

# Optionally, save the resized image to a file
image_resized.save("resized_image.png")


In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from PIL import Image, ImageOps
import matplotlib.pyplot as plt

# Load the USPS dataset using Scikit-Learn
usps = fetch_openml(name="usps", version=2)

# Convert DataFrame to NumPy arrays
usps_data = usps.data.astype(float).to_numpy()
usps_labels = usps.target.astype(int).to_numpy()

usps_data = np.reshape(usps_data, (-1, 1, 16, 16))

visualization.plot_image(usps_data[0], usps_labels[0])

# Initialize an empty array for resized images (28x28)
usps_data_resized = np.zeros((len(usps_data), 28, 28))

# Initialize a subplot for plotting
fig, axs = plt.subplots(1, 2, figsize=(8, 4))


# Create a new 28x28 array filled with zeros
new_array = np.zeros((28, 28))

# Calculate the padding (6 pixels on each side)
padding = (28 - 16) // 2

# Copy the initial array into the center of the new array
new_array[padding:padding + 16, padding:padding + 16] = initial_array

# Loop through each image, add black borders, and resize to 28x28
for i in range(len(usps_data)):


    # Plot the original and resized images (sample every 500 images)
    if i % 500 == 0:
        axs[0].imshow(image, cmap='gray')
        axs[0].set_title("Original (16x16)")
        axs[1].imshow(usps_data_resized[i], cmap='gray')
        axs[1].set_title("Resized (28x28 with borders)")
        plt.show()

# Print the shape of the resized data
print("Resized Data Shape:", usps_data_resized.shape)


In [None]:
import numpy as np
from sklearn.datasets import fetch_openml

# Load the USPS dataset using Scikit-Learn
usps = fetch_openml(name="usps", version=2)

# Convert DataFrame to NumPy arrays
usps_data = usps.data.astype(float).to_numpy()
usps_labels = usps.target.astype(int).to_numpy()

# Initialize an empty array for resized images (28x28) with padding filled with -1
usps_data_padded = np.full((len(usps_data), 28, 28), -1.0)

# Calculate padding size (6 pixels on each side)
padding = (28 - 16) // 2

# Loop through each image and add padding with -1 values to make it 28x28
for i in range(len(usps_data)):
    # Reshape the image to 16x16 pixels
    image = usps_data[i].reshape(16, 16).astype('float32')
    
    # Create a 28x28 canvas filled with -1 values
    padded_image = np.full((28, 28), -1.0)
    
    # Paste the original image into the center of the canvas
    padded_image[padding:padding+16, padding:padding+16] = image # Scale pixel values to 0-1
    
    # Store the padded image in the new array
    usps_data_padded[i] = padded_image

# Print the shape of the padded data
print("Padded Data Shape:", usps_data_padded.shape)

visualization.plot_image(usps_data_padded[0], usps_labels[0])

In [None]:
import matplotlib.pyplot as plt

# Assuming you've already defined and created the DVS_Gesture_Dataset and DataLoader instances as shown in the previous response.

# Load a batch of data and labels from the training DataLoader
batch_iterator = iter(train_loader)
sample_data, sample_labels = next(batch_iterator)

# Display a sample image from the batch
sample_image = sample_data[0].squeeze().numpy()  # Remove the batch dimension and convert to NumPy array
sample_label = sample_labels[0]

# Define a dictionary to map labels to gesture names (you may need to create this)
label_to_gesture = {0: 'Gesture 0', 1: 'Gesture 1', 2: 'Gesture 2', ...}

# Display the sample image and label
plt.imshow(sample_image, cmap='gray')
plt.title(f'Gesture: {label_to_gesture[sample_label.item()]}')
plt.show()


In [None]:
import requests

try:
    from tqdm import tqdm
except ImportError:
    tqdm = lambda x, total, unit: x  # If tqdm doesn't exist, replace it with a function that does nothing
    print('**** Could not import tqdm. Please install tqdm for download progressbars! (pip install tqdm) ****')

# Python2 compatibility
try:
    input = raw_input
except NameError:
    pass

download_dict = {
    '1) Kuzushiji-MNIST (10 classes, 28x28, 70k examples)': {
        '1) MNIST data format (ubyte.gz)':
            ['http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-images-idx3-ubyte.gz',
            'http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz',
            'http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-images-idx3-ubyte.gz',
            'http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-labels-idx1-ubyte.gz'],
        '2) NumPy data format (.npz)':
            ['http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-train-imgs.npz',
            'http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-train-labels.npz',
            'http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-test-imgs.npz',
            'http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-test-labels.npz'],
    },
    '2) Kuzushiji-49 (49 classes, 28x28, 270k examples)': {
        '1) NumPy data format (.npz)':
            ['http://codh.rois.ac.jp/kmnist/dataset/k49/k49-train-imgs.npz',
            'http://codh.rois.ac.jp/kmnist/dataset/k49/k49-train-labels.npz',
            'http://codh.rois.ac.jp/kmnist/dataset/k49/k49-test-imgs.npz',
            'http://codh.rois.ac.jp/kmnist/dataset/k49/k49-test-labels.npz'],
    },
    '3) Kuzushiji-Kanji (3832 classes, 64x64, 140k examples)': {
        '1) Folders of images (.tar)':
            ['http://codh.rois.ac.jp/kmnist/dataset/kkanji/kkanji.tar'],
    }

}

# Download a list of files
def download_list(url_list):
    for url in url_list:
        path = url.split('/')[-1]
        r = requests.get(url, stream=True)
        with open(path, 'wb') as f:
            total_length = int(r.headers.get('content-length'))
            print('Downloading {} - {:.1f} MB'.format(path, (total_length / 1024000)))

            for chunk in tqdm(r.iter_content(chunk_size=1024), total=int(total_length / 1024) + 1, unit="KB"):
                if chunk:
                    f.write(chunk)
    print('All dataset files downloaded!')

# Ask the user about which path to take down the dict
def traverse_dict(d):
    print('Please select a download option:')
    keys = sorted(d.keys())  # Print download options
    for key in keys:
        print(key)

    userinput = input('> ').strip()

    try:
        selection = int(userinput) - 1
    except ValueError:
        print('Your selection was not valid')
        traverse_dict(d)  # Try again if input was not valid
        return

    selected = keys[selection]

    next_level = d[selected]
    if isinstance(next_level, list):  # If we've hit a list of downloads, download that list
        download_list(next_level)
    else:
        traverse_dict(next_level)     # Otherwise, repeat with the next level

traverse_dict(download_dict)

In [None]:
# Based on MNIST CNN from Keras' examples: https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py (MIT License)

from __future__ import print_function
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import numpy as np

batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

def load(f):
    return np.load(f)['arr_0']



# Convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

train_score = model.evaluate(x_train, y_train, verbose=0)
test_score = model.evaluate(x_test, y_test, verbose=0)
print('Train loss:', train_score[0])
print('Train accuracy:', train_score[1])
print('Test loss:', test_score[0])
print('Test accuracy:', test_score[1])

In [None]:
def load(f):
    return np.load(f)['arr_0']
# Load the data
x_train = np.load('./Datasets/KMNIST/kmnist-train-imgs.npz')['arr_0']
y_train = np.load('./Datasets/KMNIST/kmnist-train-labels.npz')['arr_0']
x_train = x_train.reshape(-1, x_train.shape[-1])

data = MinMaxScaler().fit_transform(x_train).astype(np.float32)
x_test = x_test.astype('float32')
#x_train /= 255
#x_test /= 255
print(x_train[0])
print('{} train samples, {} test samples'.format(len(x_train), len(x_test)))
visualization.plot_image(x_train[0], usps_labels[0])

In [None]:
import numpy as np

# Create a sample 3D NumPy array (for demonstration purposes)
# Replace this with your actual 3D array.
three_dim_array = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

# Get the shape of the 3D array
original_shape = three_dim_array.shape

# Reshape the 3D array to a 2D array
# The shape of the resulting 2D array depends on your requirements.
# Here, we're flattening it into a 2D array with one row per element.
two_dim_array = three_dim_array.reshape(-1, original_shape[-1])

# Print the original and resulting arrays for demonstration
print("Original 3D Array:")
print(three_dim_array)
print("Shape of Original 3D Array:", original_shape)

print("\nReshaped 2D Array:")
print(two_dim_array)
print("Shape of Reshaped 2D Array:", two_dim_array.shape)


In [None]:
def load_data(filepath='./shrec2017_skel-data.pckl'):
    """
    Returns hand gesture sequences (X) and their associated labels (Y).
    Each sequence has two different labels.
    The first label  Y describes the gesture class out of 14 possible gestures (e.g. swiping your hand to the right).
    The second label Y describes the gesture class out of 28 possible gestures (e.g. swiping your hand to the right with your index pointed, or not pointed).
    """
    file = open(filepath, 'rb')
    data = pickle.load(file, encoding='latin1')  # <<---- change to 'latin1' to 'utf8' if the data does not load
    file.close()

In [None]:
import nltk
import numpy as np
from nltk.corpus import reuters

# Download the Reuters dataset if you haven't already
nltk.download('reuters')

# Define the four categories or classes you want to include
categories = ['earn', 'acq', 'crude', 'trade']

# Load documents from the Reuters-4 dataset
documents = reuters.fileids(categories=categories)

# Extract the text content of the documents
corpus = [reuters.raw(doc_id) for doc_id in documents]

# Use a text vectorization technique, like TF-IDF or CountVectorizer, to convert the text into numerical features
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer()
X = tfidf_vectorizer.fit_transform(corpus)

# Convert the sparse TF-IDF matrix to a dense NumPy array
X = X.toarray()

# Now, X is a NumPy array containing numerical features for clustering
print(X.shape)  # Shape of X: (number_of_documents, number_of_features)
print(X[1])

In [None]:
import torch.nn as nn

class CD_Autoencoder(nn.Module):
    def __init__(self, device, n_clusters, input_channels, input_height, input_width, latent_dim, negative_slope):
        super(CD_Autoencoder, self).__init__()
        self.device = device
        self.n_clusters = n_clusters
        self.input_channels = input_channels
        self.input_height = input_height
        self.input_width = input_width
        self.latent_dim = latent_dim
        self.negative_slope = negative_slope
        self.needsReshape = True
        
        # Encoder
        self.encoder_model = nn.Sequential(
            nn.Conv2d(self.input_channels, 32, kernel_size=5, stride=2, padding=2),
            nn.LeakyReLU(negative_slope=self.negative_slope, inplace=True),
            nn.BatchNorm2d(32),

            nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=2),
            nn.LeakyReLU(negative_slope=self.negative_slope, inplace=True),
            nn.BatchNorm2d(64),

            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=0),
            nn.LeakyReLU(negative_slope=self.negative_slope, inplace=True),
            nn.BatchNorm2d(128),

            nn.Flatten(start_dim=1),
            nn.Linear(128 * 3 * 3, self.latent_dim, bias=True),
            nn.Tanh(),
            nn.BatchNorm1d(self.latent_dim),
        )
        
        # Clustering MLP - MLP Part from latent Dimension to Number of Clusters
        self.cluster_model = nn.Sequential(
            # Output Layer
            nn.Linear(self.latent_dim, self.n_clusters, bias=True),
        )
        
        # Decoder 
        self.decoder_model = nn.Sequential(
            nn.Linear(self.latent_dim, 128 * 3 * 3, bias=True),
            nn.LeakyReLU(negative_slope=self.negative_slope, inplace=True),
            nn.BatchNorm1d(128 * 3 * 3),
            nn.Unflatten(dim=1, unflattened_size=(128, 3, 3)),

            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=0, output_padding=1),
            nn.LeakyReLU(negative_slope=self.negative_slope, inplace=True),
            nn.BatchNorm2d(64),
            
            nn.ConvTranspose2d(64, 32, kernel_size=5, stride=2, padding=2, output_padding=1),
            nn.LeakyReLU(negative_slope=self.negative_slope, inplace=True),
            nn.BatchNorm2d(32),
            
            nn.ConvTranspose2d(32, self.input_channels, kernel_size=5, stride=2, padding=2, output_padding=1),
            nn.LeakyReLU(negative_slope=self.negative_slope, inplace=True),
            nn.BatchNorm2d(self.input_channels)
        )

    def forward(self, x):
        # Forward pass through encoder
        encoded = self.encoder_model(x)
        
        # Forward pass through clustering layer
        cluster_logits = self.cluster_model(encoded)
        
        # Forward pass through decoder
        decoded = self.decoder_model(encoded)
        
        return cluster_logits, decoded
