In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import seaborn as sns
import pandas as pd
plt.ion()

device = ("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
with open('./Dataset/list_attribute.txt', 'r') as file:
    lines = file.readlines()

# Extract the column names from the first line and remove any leading/trailing whitespace
column_names = lines[0].strip().split()

# Initialize an empty list to store the data rows
data_rows = []

# Iterate over the lines starting from the second line
for line in lines[1:]:
    # Split the line on whitespace
    data = line.strip().split()

    # Get the filename
    filename = data[0]

    # Create a list of values excluding the filename
    values = [int(value) if value != '-' else None for value in data[1:]]

    # Adjust the number of values to match the expected number of columns
    if len(values) < len(column_names) - 1:
        values += [None] * (len(column_names) - 1 - len(values))
    elif len(values) > len(column_names) - 1:
        values = values[:len(column_names) - 1]

    # Append the filename and values as a row
    data_rows.append([filename] + values)

# Create a DataFrame from the list of data rows
df = pd.DataFrame(data_rows, columns=column_names)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 202599 entries, 0 to 202598
Data columns (total 41 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   image_id             202599 non-null  object
 1   5_o_Clock_Shadow     202599 non-null  int64 
 2   Arched_Eyebrows      202599 non-null  int64 
 3   Attractive           202599 non-null  int64 
 4   Bags_Under_Eyes      202599 non-null  int64 
 5   Bald                 202599 non-null  int64 
 6   Bangs                202599 non-null  int64 
 7   Big_Lips             202599 non-null  int64 
 8   Big_Nose             202599 non-null  int64 
 9   Black_Hair           202599 non-null  int64 
 10  Blond_Hair           202599 non-null  int64 
 11  Blurry               202599 non-null  int64 
 12  Brown_Hair           202599 non-null  int64 
 13  Bushy_Eyebrows       202599 non-null  int64 
 14  Chubby               202599 non-null  int64 
 15  Double_Chin          202599 non-nu

In [6]:
df

Unnamed: 0,image_id,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,...,-1,1,1,-1,1,-1,1,-1,-1,1
1,000002.jpg,-1,-1,-1,1,-1,-1,-1,1,-1,...,-1,1,-1,-1,-1,-1,-1,-1,-1,1
2,000003.jpg,-1,-1,-1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,1,-1,-1,-1,-1,-1,1
3,000004.jpg,-1,-1,1,-1,-1,-1,-1,-1,-1,...,-1,-1,1,-1,1,-1,1,1,-1,1
4,000005.jpg,-1,1,1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,-1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202594,202595.jpg,-1,-1,1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,-1,-1,1
202595,202596.jpg,-1,-1,-1,-1,-1,1,1,-1,-1,...,-1,1,1,-1,-1,-1,-1,-1,-1,1
202596,202597.jpg,-1,-1,-1,-1,-1,-1,-1,-1,1,...,-1,1,-1,-1,-1,-1,-1,-1,-1,1
202597,202598.jpg,-1,1,1,-1,-1,-1,1,-1,1,...,-1,1,-1,1,1,-1,1,-1,-1,1


In [7]:
df.to_csv('list_attribute.csv', index=False)

In [8]:
images = './Dataset/images'

In [10]:
df['ImageExists'] = df['image_id'].apply(lambda x: os.path.exists(os.path.join(images, x)))

In [11]:
df = df[df['ImageExists']]

In [12]:
df.drop('ImageExists', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop('ImageExists', axis=1, inplace=True)


In [13]:
df

Unnamed: 0,image_id,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
50,000051.jpg,1,-1,-1,1,1,-1,-1,1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
51,000052.jpg,-1,-1,-1,1,-1,-1,-1,1,-1,...,-1,-1,1,-1,-1,-1,-1,-1,-1,-1
64,000065.jpg,-1,-1,-1,-1,-1,-1,-1,-1,1,...,-1,1,1,-1,-1,-1,-1,-1,-1,1
165,000166.jpg,1,-1,-1,-1,-1,-1,-1,1,-1,...,1,-1,-1,-1,-1,1,-1,-1,-1,-1
197,000198.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,-1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202319,202320.jpg,-1,1,-1,-1,-1,-1,-1,-1,-1,...,-1,1,-1,1,-1,-1,1,-1,-1,1
202339,202340.jpg,-1,-1,1,-1,-1,-1,-1,-1,1,...,-1,1,1,-1,-1,-1,1,-1,-1,1
202346,202347.jpg,-1,1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,1,-1,-1,1,-1,-1,1
202356,202357.jpg,-1,-1,1,-1,-1,-1,1,-1,-1,...,-1,-1,1,1,-1,-1,1,1,-1,1


In [33]:
df.to_csv('image_attributes_filtered.csv', index=False)