In [9]:
import os
import csv
import pandas as pd
from torchvision import transforms
from PIL import Image

In [10]:
folder = "Downloads/weather"
#directory with images folder

# Data preprocessing

In [11]:
# create CSV with labels and image names

# assign label
def assign_label(nome_file):
    if nome_file.startswith("rain"):
        return "rain"
    elif nome_file.startswith("cloudy"):
        return "cloudy"
    elif nome_file.startswith("shine"):
        return "shine"
    elif nome_file.startswith("sunrise"):
        return "sunrise"
    else:
        return "other"

# List
dati = []


for x in os.listdir(folder):
    if x.endswith(tuple(".jpg")):
        label = assign_label(x)
        dati.append((x, label))


df = pd.DataFrame(dati, columns=["image", "label"])



In [12]:
df.head()

Unnamed: 0,image,label
0,cloudy126.jpg,cloudy
1,rain20.jpg,rain
2,rain34.jpg,rain
3,cloudy132.jpg,cloudy
4,sunrise33.jpg,sunrise


In [13]:
# input dimensions for the CNN
input_dim = (224,224)
input_dir_cnn = folder + "/images/input_images_cnn"


In [14]:
os.makedirs(input_dir_cnn, exist_ok = True)


In [15]:
transformation_for_cnn_input = transforms.Compose([transforms.Resize(input_dim)])


In [16]:
output_dir = 'Downloads/output_images_cnn'


In [17]:
folder = 'Downloads/weather'  
input_dir_cnn = output_dir  


os.makedirs(input_dir_cnn, exist_ok=True)

for item in os.listdir(folder):
    item_path = os.path.join(folder, item)

    
    if os.path.isfile(item_path):  # if is a file
        I = Image.open(item_path)

        
        if I.mode != 'RGB': # convert in RGB (colors)
            I = I.convert('RGB')

        
        newI = transformation_for_cnn_input(I) # transform for cnn

        
        newI.save(os.path.join(input_dir_cnn, item)) # save in jpeg

        # close images
        I.close()
        newI.close()


In [18]:
import torch
from torchvision import models
from PIL import Image

In [19]:
class Img2VecResnet18():
    
    def __init__(self):
        
        self.device = torch.device("cpu")
        self.numberFeatures = 512
        self.modelName = "resnet-18"
        self.model, self.featureLayer = self.getFeatureLayer() 
        self.model = self.model.to(self.device) 
        self.model.eval()
        self.toTensor = transforms.ToTensor()
        self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                              std=[0.229, 0.224, 0.225])
        
    def getFeatureLayer(self):
    
        cnnModel = models.resnet18(pretrained=True) 
        layer = cnnModel._modules.get('avgpool') 
        self.layer_output_size = 512
        return cnnModel, layer

    def getVec(self, img):
        image = self.normalize(self.toTensor(img)).unsqueeze(0).to(self.device) 
        embedding = torch.zeros(1, self.numberFeatures, 1, 1)
        def copyData(m, i, o): embedding.copy_(o.data)
        h = self.featureLayer.register_forward_hook(copyData)
        self.model(image)
        h.remove()
        return embedding.numpy()[0, :, 0, 0]


    


In [20]:
import tqdm
from torchvision import models, transforms 

In [26]:
img2vec = Img2VecResnet18()

In [27]:
allVectors = {}

In [28]:
for image_name in os.listdir(input_dir_cnn):
    I = Image.open(os.path.join(input_dir_cnn, image_name))
    vec = img2vec.getVec(I)
    allVectors[image_name] = vec
    I.close()

In [36]:
data_folder = os.path.expanduser('~/Downloads')

In [37]:
pd.DataFrame(allVectors).transpose().to_csv(data_folder+'/input_data_vectors.csv')
