In [5]:
import pandas as pd

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('fashion.csv')

In [6]:
# Display the first 5 rows of the dataframe
df.head()

Unnamed: 0,ProductId,Gender,Category,SubCategory,ProductType,Colour,Usage,ProductTitle,Image,ImageURL
0,42419,Girls,Apparel,Topwear,Tops,White,Casual,Gini and Jony Girls Knit White Top,42419.jpg,http://assets.myntassets.com/v1/images/style/p...
1,34009,Girls,Apparel,Topwear,Tops,Black,Casual,Gini and Jony Girls Black Top,34009.jpg,http://assets.myntassets.com/v1/images/style/p...
2,40143,Girls,Apparel,Topwear,Tops,Blue,Casual,Gini and Jony Girls Pretty Blossom Blue Top,40143.jpg,http://assets.myntassets.com/v1/images/style/p...
3,23623,Girls,Apparel,Topwear,Tops,Pink,Casual,Doodle Kids Girls Pink I love Shopping Top,23623.jpg,http://assets.myntassets.com/v1/images/style/p...
4,47154,Girls,Apparel,Bottomwear,Capris,Black,Casual,Gini and Jony Girls Black Capris,47154.jpg,http://assets.myntassets.com/v1/images/style/p...


In [7]:
# Get a concise summary of the dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2906 entries, 0 to 2905
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   ProductId     2906 non-null   int64 
 1   Gender        2906 non-null   object
 2   Category      2906 non-null   object
 3   SubCategory   2906 non-null   object
 4   ProductType   2906 non-null   object
 5   Colour        2906 non-null   object
 6   Usage         2906 non-null   object
 7   ProductTitle  2906 non-null   object
 8   Image         2906 non-null   object
 9   ImageURL      2906 non-null   object
dtypes: int64(1), object(9)
memory usage: 227.2+ KB


In [8]:
# Get the dimensions of the dataframe (rows, columns)
df.shape

(2906, 10)

In [9]:
# Count the occurrences of each category
df['Category'].value_counts()

Category
Footwear    1580
Apparel     1326
Name: count, dtype: int64

In [10]:
# --- 1. Select a random sample of 100 products ---
# We use a random_state to ensure we get the same sample every time we run it.
sample_df = df.sample(n=100, random_state=42)

# --- 2. Create the new, clean DataFrame ---
# We select only the columns we need and rename them.
final_df = pd.DataFrame()
final_df['id'] = sample_df['ProductId']
final_df['product_name'] = sample_df['ProductTitle']
final_df['category'] = sample_df['Category']

# We create the correct image path for our project's structure.
final_df['image_path'] = 'static/images/' + sample_df['Image']

# --- 3. Save the new DataFrame to products.csv ---
# index=False prevents pandas from writing row numbers into the file.
final_df.to_csv('products.csv', index=False)

print("'products.csv' file created successfully with 100 products!")
print("\nNow, you need to copy the following image files to your 'static/images' folder:")

# --- 4. Print the list of image files you need to copy ---
for image_file in sample_df['Image']:
    print(image_file)

'products.csv' file created successfully with 100 products!

Now, you need to copy the following image files to your 'static/images' folder:
38326.jpg
35883.jpg
8417.jpg
10294.jpg
3160.jpg
20777.jpg
42087.jpg
31100.jpg
15570.jpg
4201.jpg
4204.jpg
24454.jpg
39332.jpg
19124.jpg
8071.jpg
54110.jpg
10097.jpg
35440.jpg
36730.jpg
2963.jpg
4184.jpg
4325.jpg
12705.jpg
2727.jpg
40924.jpg
5691.jpg
15714.jpg
47191.jpg
11856.jpg
34633.jpg
22162.jpg
24455.jpg
33840.jpg
58329.jpg
4202.jpg
23247.jpg
35434.jpg
6628.jpg
9052.jpg
16163.jpg
43883.jpg
57304.jpg
42732.jpg
23848.jpg
42026.jpg
39002.jpg
13682.jpg
44202.jpg
36216.jpg
56679.jpg
20805.jpg
39310.jpg
5405.jpg
30730.jpg
26459.jpg
40137.jpg
58459.jpg
44497.jpg
45371.jpg
2722.jpg
38652.jpg
13473.jpg
31948.jpg
6470.jpg
2703.jpg
5427.jpg
49461.jpg
41755.jpg
26598.jpg
38336.jpg
56961.jpg
33273.jpg
16733.jpg
34029.jpg
39001.jpg
37378.jpg
41752.jpg
2705.jpg
38985.jpg
5600.jpg
17411.jpg
31102.jpg
26562.jpg
56879.jpg
54542.jpg
32813.jpg
35999.jpg
32853.jpg

In [13]:
import os

# --- 1. Let's see the EXACT filenames we are looking for from the CSV ---
# This part remains the same.
files_to_copy = list(sample_df['Image'])
print("--- We are LOOKING FOR files like this: ---")
for f in files_to_copy[:5]:
    print(f)
print("-" * 40)


# --- 2. Now, let's see the ACTUAL filenames that exist in your 'img' folder ---
# This part is updated for a single folder.
source_dir = 'img' 
print(f"\n--- We are FINDING files in the '{source_dir}' folder like this: ---")
try:
    # Get a list of all files in the directory
    all_files_in_dir = os.listdir(source_dir)
    
    # Print the first 5 found files as an example
    for filename in all_files_in_dir[:5]:
        print(filename)

except FileNotFoundError:
    print(f"Error: The directory '{source_dir}' was not found. Please make sure the name is correct.")

print("-" * 40)

--- We are LOOKING FOR files like this: ---
38326.jpg
35883.jpg
8417.jpg
10294.jpg
3160.jpg
----------------------------------------

--- We are FINDING files in the 'img' folder like this: ---
10037.jpg
10039.jpg
10054.jpg
10096.jpg
10097.jpg
----------------------------------------


In [14]:
import os
import shutil

# --- 1. Define source and destination ---
source_dir = 'img'
destination_dir = 'static/images'

# --- 2. Get the list of filenames we need from our sample DataFrame ---
files_to_copy = list(sample_df['Image'])

# --- 3. Ensure the destination folder exists ---
os.makedirs(destination_dir, exist_ok=True)

# --- 4. Loop through our list and copy each file ---
print("Starting to copy your 100 sample images...")
copied_count = 0
for filename in files_to_copy:
    source_path = os.path.join(source_dir, filename)
    destination_path = os.path.join(destination_dir, filename)
    
    # Check if the source file actually exists before trying to copy
    if os.path.exists(source_path):
        shutil.copy(source_path, destination_path)
        copied_count += 1
    else:
        print(f"Warning: Could not find file {filename} in {source_dir}")

print(f"\nDone! ✅ Successfully copied {copied_count} out of {len(files_to_copy)} images.")

Starting to copy your 100 sample images...

Done! ✅ Successfully copied 100 out of 100 images.
