In [None]:
import os
import random
from PIL import Image, ImageDraw, ImageFont

def sample_images_from_folders(base_folder, sample_size=5):
    sampled_images = []
    labels = []

    for folder_name in sorted(os.listdir(base_folder)):
        folder_path = os.path.join(base_folder, folder_name)
        if os.path.isdir(folder_path):
            images = [os.path.join(folder_path, img) for img in os.listdir(folder_path) if img.endswith('.png')]
            sampled_images.extend(random.sample(images, min(sample_size, len(images))))
            labels.extend([folder_name] * min(sample_size, len(images)))

    return sampled_images, labels

def create_combined_image(image_paths, labels, output_path, group_size=5, images_per_row=10):
    # Load the first image to get size
    first_image = Image.open(image_paths[0])
    width, height = first_image.size

    groups_per_row = images_per_row // group_size
    rows = (len(image_paths) // images_per_row) + (1 if len(image_paths) % images_per_row != 0 else 0)
    combined_image = Image.new('RGB', (width * images_per_row, height * rows))
    draw = ImageDraw.Draw(combined_image)

    try:
        font = ImageFont.truetype("arial.ttf", 16)
    except IOError:
        font = ImageFont.load_default()

    for index, (image_path, label) in enumerate(zip(image_paths, labels)):
        image = Image.open(image_path)
        x_offset = (index % images_per_row) * width
        y_offset = (index // images_per_row) * height
        combined_image.paste(image, (x_offset, y_offset))
        
        # Draw label and index
        text = f'{index}: {label}'
        text_position = (x_offset + 5, y_offset + 5)
        draw.text(text_position, text, (255, 255, 255), font=font)

    combined_image.save(output_path)
    combined_image.show()

def main(base_folder, output_path):
    sampled_images, labels = sample_images_from_folders(base_folder)
    create_combined_image(sampled_images, labels, output_path)

# Define the paths
base_folder = '/data1/dxw_data/llm/Multimodal-MKT/model/output_cluster_imagebind100'
output_path = '/data1/dxw_data/llm/Multimodal-MKT/combined_image.png'

# Run the main function
main(base_folder, output_path)


In [1]:
import os
import random
from PIL import Image, ImageDraw, ImageFont

# Top 3 Categories for each month
top_3_categories = [
    [1, 17, 9], [9, 17, 6], [4, 17, 9], [4, 6, 17], 
    [4, 6, 0], [19, 4, 9], [11, 17, 4], [17, 10, 11], 
    [9, 17, 2], [17, 9, 1], [9, 17, 1], [1, 18, 17]
]

def sample_images_from_folders(base_folder, sample_size=5):
    sampled_images = []
    labels = []

    for category_list in top_3_categories:
        for category in category_list:
            folder_name = str(category)
            folder_path = os.path.join(base_folder, folder_name)
            if os.path.isdir(folder_path):
                images = [os.path.join(folder_path, img) for img in os.listdir(folder_path) if img.endswith('.png')]
                sampled_images.extend(random.sample(images, min(sample_size, len(images))))
                labels.extend([folder_name] * min(sample_size, len(images)))

    return sampled_images, labels

def create_combined_image(image_paths, labels, output_path, group_size=5, images_per_row=15, months=12):
    # Load the first image to get size
    first_image = Image.open(image_paths[0])
    width, height = first_image.size

    rows = months
    combined_image = Image.new('RGB', (width * images_per_row, height * rows), (0, 0, 0))
    draw = ImageDraw.Draw(combined_image)

    try:
        font = ImageFont.truetype("arial.ttf", 32)
    except IOError:
        font = ImageFont.load_default()

    for row in range(rows):
        # Draw month number with black background
        month_text = f'{row + 1} Month'
        text_position = (5, row * height + 5)
        draw.text(text_position, month_text, (255, 255, 255), font=font)

    for index, (image_path, label) in enumerate(zip(image_paths, labels)):
        image = Image.open(image_path)
        x_offset = (index % images_per_row) * width + width
        y_offset = (index // images_per_row) * height
        combined_image.paste(image, (x_offset, y_offset))
        
        # Draw label and index
        text = f'{index % group_size + 1}: {label}'
        text_position = (x_offset + 5, y_offset + 5)
        draw.text(text_position, text, (255, 255, 255), font=font)

    combined_image.save(output_path)

def main(base_folder, output_path):
    sampled_images, labels = sample_images_from_folders(base_folder)
    create_combined_image(sampled_images, labels, output_path)

# Define the paths
base_folder = '/data1/dxw_data/llm/redbook_final/script_next/output_cluster_imagebind_cloth'
output_path = '/data1/dxw_data/llm/Multimodal-MKT/model/combined_image_monthtop3-3.png'

# Run the main function
main(base_folder, output_path)


Post ID 652fb4a8000000001e02c024 not found in CSV file
Post ID 658ec3650000000012006fde not found in CSV file
Post ID 656016af000000003203b36c not found in CSV file
Post ID 653b815100000000040396e4 not found in CSV file
Combined image saved to /data1/dxw_data/llm/redbook_final/predict_output/combined_image.png


In [12]:
import os
import random
from PIL import Image, ImageDraw, ImageFont

# Updated Top 3 Categories for each month
top_3_categories = [
    [64, 68, 14], [73, 31, 37], [74, 51, 94], [18, 44, 29], 
    [29, 74, 66], [17, 38, 84], [84, 76, 8], [14, 17, 82], 
    [19, 14, 63], [3, 45, 64], [68, 71, 64], [64, 68, 14]
]

def sample_images_from_folders(base_folder, sample_size=5):
    sampled_images = []
    labels = []

    for category_list in top_3_categories:
        for category in category_list:
            folder_name = str(category)
            folder_path = os.path.join(base_folder, folder_name)
            if os.path.isdir(folder_path):
                images = [os.path.join(folder_path, img) for img in os.listdir(folder_path) if img.endswith('.png')]
                sampled_images.extend(random.sample(images, min(sample_size, len(images))))
                labels.extend([folder_name] * min(sample_size, len(images)))

    return sampled_images, labels

def create_combined_image(image_paths, labels, output_path, group_size=5, images_per_row=15, months=12):
    # Load the first image to get size
    first_image = Image.open(image_paths[0])
    width, height = first_image.size

    rows = months
    combined_image = Image.new('RGB', (width * images_per_row, height * rows), (0, 0, 0))
    draw = ImageDraw.Draw(combined_image)

    try:
        font = ImageFont.truetype("arial.ttf", 32)
    except IOError:
        font = ImageFont.load_default()

    for row in range(rows):
        # Draw month number with black background
        month_text = f'{row + 1} Month'
        text_position = (5, row * height + 5)
        draw.text(text_position, month_text, (255, 255, 255), font=font)

    for index, (image_path, label) in enumerate(zip(image_paths, labels)):
        image = Image.open(image_path)
        x_offset = (index % images_per_row) * width + width
        y_offset = (index // images_per_row) * height
        combined_image.paste(image, (x_offset, y_offset))
        
        # Draw label and index
        text = f'{index % group_size + 1}: {label}'
        text_position = (x_offset + 5, y_offset + 5)
        draw.text(text_position, text, (255, 255, 255), font=font)

    combined_image.save(output_path)

def main(base_folder, output_path):
    sampled_images, labels = sample_images_from_folders(base_folder)
    create_combined_image(sampled_images, labels, output_path)

# Define the paths
base_folder = '/data1/dxw_data/llm/redbook_final/script_next/output_cluster_imagebind_cloth'
output_path = '/data1/dxw_data/llm/Multimodal-MKT/model/proportion_cloth.png'

# Run the main function
main(base_folder, output_path)


In [4]:
import os
import pandas as pd
from PIL import Image
import shutil

# Define paths
csv_file_path = '/data1/dxw_data/llm/redbook_final/script_next/rawdata_20%.csv'
data_img_folder = '/data1/dxw_data/llm/redbook_final/script_next/data_img_20%'
predict_output_folder = '/data1/dxw_data/llm/redbook_final/predict_output'

# Ensure predict_output_folder exists
os.makedirs(predict_output_folder, exist_ok=True)

# Read the CSV file
df = pd.read_csv(csv_file_path)

# List of post_ids from the table
post_ids = [
    "655c80b000000000330093c0", "6562b917000000003300563e", "653fec5f000000001f034cce",
    "65840a5a000000000602b98c", "6540ff7c0000000025020b89", "652d2f0e000000001a01540a",
    "6531fda1000000001f03f950", "653a1632000000001f007a20", "6549b6690000000025008fb1",
    "653ce06c00000000250099fe", "6553966d000000000f02ba6c", "657844db000000000801fb87",
    "6544b4d6000000001f03fcb8", "654b85f7000000003103ed3f", "6537a2e8000000002202fe5f",
    "658648370000000009023b33", "6548b4e3000000001d015b26", "652f8c70000000001c016d99",
    "658c1ec2000000001000c781", "6512dbff000000001e02221e", "653bb723000000002201d7fa",
    "65857227000000000901cdb5"
]

# Function to find the image file
def find_image_file(post_date, poster_id, post_id):
    pattern = f"{post_date}_{poster_id}_{post_id}"
    for file_name in os.listdir(data_img_folder):
        if file_name.startswith(pattern):
            return file_name
    return None

# Collect image paths and IDs
image_paths = []
combined_post_ids = []
for post_id in post_ids:
    matching_rows = df[df['post_id'] == post_id]
    if not matching_rows.empty:
        row = matching_rows.iloc[0]
        post_date = row['post_date']
        poster_id = row['poster_id']
        image_file = find_image_file(post_date, poster_id, post_id)
        if image_file:
            source_path = os.path.join(data_img_folder, image_file)
            destination_path = os.path.join(predict_output_folder, image_file)
            shutil.copy(source_path, destination_path)
            image_paths.append(destination_path)
            combined_post_ids.append(post_id)
    else:
        print(f"Post ID {post_id} not found in CSV file")

# Ignore the last image to keep only 21 images
image_paths = image_paths[:21]

# Function to combine images in 3 rows and 7 columns
def combine_images_grid(image_paths, output_path, rows=3, cols=7):
    images = [Image.open(image_path) for image_path in image_paths]
    widths, heights = zip(*(image.size for image in images))
    
    max_width = max(widths)
    max_height = max(heights)
    
    total_width = cols * max_width
    total_height = rows * max_height
    
    combined_image = Image.new('RGB', (total_width, total_height))
    
    for i, img in enumerate(images):
        row = i // cols
        col = i % cols
        x_offset = col * max_width
        y_offset = row * max_height
        combined_image.paste(img, (x_offset, y_offset))
    
    combined_image.save(output_path)

# Combine the images and save
if image_paths:
    output_image_path = '/data1/dxw_data/llm/redbook_final/predict_output/combined_image_body-classification.png' # classification
    combine_images_grid(image_paths, output_image_path)
    print(f"Combined image saved to {output_image_path}")
    print(f"Combined images with Post IDs: {', '.join(combined_post_ids)}")
else:
    print("No images found to combine")


  df = pd.read_csv(csv_file_path)


Combined image saved to /data1/dxw_data/llm/redbook_final/predict_output/combined_image_body-classification.png
Combined images with Post IDs: 655c80b000000000330093c0, 6562b917000000003300563e, 653fec5f000000001f034cce, 65840a5a000000000602b98c, 6540ff7c0000000025020b89, 652d2f0e000000001a01540a, 6531fda1000000001f03f950, 653a1632000000001f007a20, 6549b6690000000025008fb1, 653ce06c00000000250099fe, 6553966d000000000f02ba6c, 657844db000000000801fb87, 6544b4d6000000001f03fcb8, 654b85f7000000003103ed3f, 6537a2e8000000002202fe5f, 658648370000000009023b33, 6548b4e3000000001d015b26, 652f8c70000000001c016d99, 658c1ec2000000001000c781, 6512dbff000000001e02221e, 653bb723000000002201d7fa, 65857227000000000901cdb5


In [1]:
import os
import pandas as pd
from PIL import Image
import shutil

# Define paths
csv_file_path = '/data1/dxw_data/llm/redbook_final/script_next/rawdata_20%.csv'
data_img_folder = '/data1/dxw_data/llm/redbook_final/script_next/data_img_20%'
predict_output_folder = '/data1/dxw_data/llm/redbook_final/predict_output'

# Ensure predict_output_folder exists
os.makedirs(predict_output_folder, exist_ok=True)

# Read the CSV file
df = pd.read_csv(csv_file_path)

# List of post_ids from the table
# List of post_ids from the table
# List of post_ids from the table
post_ids = [
    "655c80b000000000330093c0", "6562b917000000003300563e", "653fec5f000000001f034cce",
    "65840a5a000000000602b98c", "6540ff7c0000000025020b89", "652d2f0e000000001a01540a",
]


# Function to find the image file
def find_image_file(post_date, poster_id, post_id):
    pattern = f"{post_date}_{poster_id}_{post_id}"
    for file_name in os.listdir(data_img_folder):
        if file_name.startswith(pattern):
            return file_name
    return None

# Collect image paths and IDs
image_paths = []
combined_post_ids = []
for post_id in post_ids:
    matching_rows = df[df['post_id'] == post_id]
    if not matching_rows.empty:
        row = matching_rows.iloc[0]
        post_date = row['post_date']
        poster_id = row['poster_id']
        image_file = find_image_file(post_date, poster_id, post_id)
        if image_file:
            source_path = os.path.join(data_img_folder, image_file)
            destination_path = os.path.join(predict_output_folder, image_file)
            shutil.copy(source_path, destination_path)
            image_paths.append(destination_path)
            combined_post_ids.append(post_id)
    else:
        print(f"Post ID {post_id} not found in CSV file")

# Function to combine images horizontally
def combine_images_horizontally(image_paths, output_path):
    images = [Image.open(image_path) for image_path in image_paths]
    widths, heights = zip(*(image.size for image in images))
    
    total_width = sum(widths)
    max_height = max(heights)
    
    combined_image = Image.new('RGB', (total_width, max_height))
    
    x_offset = 0
    for img in images:
        combined_image.paste(img, (x_offset, 0))
        x_offset += img.width
    
    combined_image.save(output_path)

# Combine the images and save
if image_paths:
    output_image_path = '/data1/dxw_data/llm/redbook_final/predict_output/combined_image_body-classification.png' # classification
    combine_images_horizontally(image_paths, output_image_path)
    print(f"Combined image saved to {output_image_path}")
    print(f"Combined images with Post IDs: {', '.join(combined_post_ids)}")
else:
    print("No images found to combine")


  df = pd.read_csv(csv_file_path)


Combined image saved to /data1/dxw_data/llm/redbook_final/predict_output/combined_image_body-classification.png
Combined images with Post IDs: 655c80b000000000330093c0, 6562b917000000003300563e, 653fec5f000000001f034cce, 65840a5a000000000602b98c, 6540ff7c0000000025020b89, 652d2f0e000000001a01540a


In [1]:
import os
import pandas as pd
from PIL import Image
import shutil

# Define paths
csv_file_path = '/data1/dxw_data/llm/redbook_final/script_next/rawdata_20%.csv'
data_img_folder = '/data1/dxw_data/llm/redbook_final/script_next/data_img_20%_segcloth_background_combineall'
predict_output_folder = '/data1/dxw_data/llm/redbook_final/predict_output'

# Ensure predict_output_folder exists
os.makedirs(predict_output_folder, exist_ok=True)

# Read the CSV file
df = pd.read_csv(csv_file_path)

# List of post_ids from the table
# List of post_ids from the table
# List of post_ids from the table
post_ids = [
    "65540a6c000000001703445c", "656bfa48000000003802ba35", "6505c1d6000000001f03c96e",
    "657537b1000000000700b490", "65377db50000000025020708", "6579b723000000000801c8b2","658ba217000000000f01eaa6",
]


# Function to find the image file
def find_image_file(post_date, poster_id, post_id):
    pattern = f"{post_date}_{poster_id}_{post_id}"
    for file_name in os.listdir(data_img_folder):
        if file_name.startswith(pattern):
            return file_name
    return None

# Collect image paths and IDs
image_paths = []
combined_post_ids = []
for post_id in post_ids:
    matching_rows = df[df['post_id'] == post_id]
    if not matching_rows.empty:
        row = matching_rows.iloc[0]
        post_date = row['post_date']
        poster_id = row['poster_id']
        image_file = find_image_file(post_date, poster_id, post_id)
        if image_file:
            source_path = os.path.join(data_img_folder, image_file)
            destination_path = os.path.join(predict_output_folder, image_file)
            shutil.copy(source_path, destination_path)
            image_paths.append(destination_path)
            combined_post_ids.append(post_id)
    else:
        print(f"Post ID {post_id} not found in CSV file")

# Function to combine images horizontally
def combine_images_horizontally(image_paths, output_path):
    images = [Image.open(image_path) for image_path in image_paths]
    widths, heights = zip(*(image.size for image in images))
    
    total_width = sum(widths)
    max_height = max(heights)
    
    combined_image = Image.new('RGB', (total_width, max_height))
    
    x_offset = 0
    for img in images:
        combined_image.paste(img, (x_offset, 0))
        x_offset += img.width
    
    combined_image.save(output_path)

# Combine the images and save
if image_paths:
    output_image_path = '/data1/dxw_data/llm/redbook_final/predict_output/combined_image_body-classification.png' # classification
    combine_images_horizontally(image_paths, output_image_path)
    print(f"Combined image saved to {output_image_path}")
    print(f"Combined images with Post IDs: {', '.join(combined_post_ids)}")
else:
    print("No images found to combine")


  df = pd.read_csv(csv_file_path)


Combined image saved to /data1/dxw_data/llm/redbook_final/predict_output/combined_image_body-classification.png
Combined images with Post IDs: 65540a6c000000001703445c, 656bfa48000000003802ba35, 6505c1d6000000001f03c96e, 657537b1000000000700b490, 65377db50000000025020708, 6579b723000000000801c8b2, 658ba217000000000f01eaa6


In [2]:
import os
import pandas as pd
from PIL import Image
import shutil

# Define paths
csv_file_path = '/data1/dxw_data/llm/redbook_final/script_next/rawdata_20%.csv'
data_img_folder = '/data1/dxw_data/llm/redbook_final/script_next/data_img_20%_segcloth_background_combineall'
predict_output_folder = '/data1/dxw_data/llm/redbook_final/predict_output'

# Ensure predict_output_folder exists
os.makedirs(predict_output_folder, exist_ok=True)

# Read the CSV file
df = pd.read_csv(csv_file_path)

# List of post_ids from the table
# List of post_ids from the table
# List of post_ids from the table
post_ids = [
    "658c052d000000001101fb76", "65741188000000001502d02e", "6505c1d6000000001f03c96e",
    "652aa156000000001e033d15", 
]


# Function to find the image file
def find_image_file(post_date, poster_id, post_id):
    pattern = f"{post_date}_{poster_id}_{post_id}"
    for file_name in os.listdir(data_img_folder):
        if file_name.startswith(pattern):
            return file_name
    return None

# Collect image paths and IDs
image_paths = []
combined_post_ids = []
for post_id in post_ids:
    matching_rows = df[df['post_id'] == post_id]
    if not matching_rows.empty:
        row = matching_rows.iloc[0]
        post_date = row['post_date']
        poster_id = row['poster_id']
        image_file = find_image_file(post_date, poster_id, post_id)
        if image_file:
            source_path = os.path.join(data_img_folder, image_file)
            destination_path = os.path.join(predict_output_folder, image_file)
            shutil.copy(source_path, destination_path)
            image_paths.append(destination_path)
            combined_post_ids.append(post_id)
    else:
        print(f"Post ID {post_id} not found in CSV file")

# Function to combine images horizontally
def combine_images_horizontally(image_paths, output_path):
    images = [Image.open(image_path) for image_path in image_paths]
    widths, heights = zip(*(image.size for image in images))
    
    total_width = sum(widths)
    max_height = max(heights)
    
    combined_image = Image.new('RGB', (total_width, max_height))
    
    x_offset = 0
    for img in images:
        combined_image.paste(img, (x_offset, 0))
        x_offset += img.width
    
    combined_image.save(output_path)

# Combine the images and save
if image_paths:
    output_image_path = '/data1/dxw_data/llm/redbook_final/predict_output/combined_image_body-regression.png' # classification
    combine_images_horizontally(image_paths, output_image_path)
    print(f"Combined image saved to {output_image_path}")
    print(f"Combined images with Post IDs: {', '.join(combined_post_ids)}")
else:
    print("No images found to combine")


  df = pd.read_csv(csv_file_path)


Combined image saved to /data1/dxw_data/llm/redbook_final/predict_output/combined_image_body-regression.png
Combined images with Post IDs: 658c052d000000001101fb76, 65741188000000001502d02e, 6505c1d6000000001f03c96e, 652aa156000000001e033d15
