In [None]:
from tqdm import tqdm
import os

import PIL
from PIL import Image
import numpy as np
import json
import torch
from torch.utils.data import Dataset, DataLoader
import itertools
from torchvision import datasets, transforms, models
from einops import rearrange
from itertools import product
import math
import torchvision.models as models
import argparse
import torchvision
import random
random_seed=777
torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [None]:
weight = '/data/jong980812/project/mae/result_after_shapley/car_type/256_1e-3/OUT/01/checkpoint-99.pth'
padding_mode='zeros'

model=models.efficientnet_b1(pretrained=True,progress=False)
model.classifier[1] = torch.nn.Linear(1280, 10)
for name, layer in model.named_modules():
    if isinstance(layer, torch.nn.Conv2d):
        layer.padding_mode = padding_mode
        
# load model    
checkpoint = torch.load(weight, map_location='cpu')
print("Load pre-trained checkpoint from: %s" % weight)
checkpoint_model = checkpoint['model']
state_dict = model.state_dict()
msg = model.load_state_dict(checkpoint_model, strict=False)
print(msg)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
class Shapley_part_car(Dataset):
    def __init__(self, data_folder,anno_path, json_folder, transform=None):
        self.json_folder = json_folder
        self.data_folder = data_folder
        self.anno_path = anno_path
        self.transform = transform
        import pandas as pd
        # CSV 파일 경로
        # CSV 파일을 DataFrame으로 읽기
        df = pd.read_csv(self.anno_path,header=0,delimiter=',')

        # 'path' 열을 self.image_paths로, 'lab' 열을 self.label_list로 설정
        self.label_list = df.values[:,-1].tolist()
        self.image_list = df.values[:,0].tolist()
        self.json_paths = [image_path.split('/')[-1].split('.')[0] + ".json" for image_path in self.image_list] #! Get json path from image paths.
        # print(self.image_paths)
    def __repr__(self) -> str:
        target_foler= self.data_folder
        number = len(self.image_list)
        return f'Standford_CAR: Shapley Part Dataset class\nTarget Folder:{target_foler}\nData Num:{number}'
    def get_part_json(self, json_file_path):
        '''
        Get part dictionary from json path
        '''
        part_json = {}
        part_name = ['door','glass','wheel','light','sideglass']
        # door_active, glass_active, wheel_active, light_active, sideglass_active = binary_combination
        
        with open(json_file_path, 'r') as f:
            boxes = json.load(f)['shapes']
            for part in part_name:
                part_json[part]=[]
            for box in boxes:
                part_json[box["label"]].append(box["points"])
    
        for key in part_json:#! 빈 애들은 None으로 처리해서 없다고 판단.
            if part_json[key] == []:
                part_json[key] = None

        return part_json
    def get_coords(self, part):
        extracted_coordinates = []
        if part is None:
            return None
        elif len(part) == 1:
            # print(part[0][0])
            xmin, ymin = list(map(int,part[0][0]))
            xmax, ymax = list(map(int,part[0][1]))
            return [[xmin,ymin,xmax,ymax]]#아래 2일경우와 통일하기 위해 이중 리스트로 
        elif len(part) == 2:
            #! Eye, Ear, hand, foot -> These have 2 part, return list
            for a in part: 
                # print(a)
                xmin, ymin = list(map(int,a[0]))
                xmax, ymax = list(map(int,a[1]))
                extracted_coordinates.append([xmin,ymin,xmax,ymax])
            return extracted_coordinates
        else:
            for a in part: 
                # print(a)
                xmin, ymin = list(map(int,a[0]))
                xmax, ymax = list(map(int,a[1]))
                extracted_coordinates.append([xmin,ymin,xmax,ymax])
            return extracted_coordinates
    def get_black_image(self,size):
        return Image.new("RGB", size, (0, 0, 0))
    def get_empty_face(self,img, part_imgs, part_json):
        '''
        head: 머리카락포함, 어꺠밑에 내려와있는 머리까지.
        만약에 머리카락이 손가락까지 내려와있으면, 
        '''
        head_coords = self.get_coords(part_json['head'])
        head = part_imgs['head'][0]#!
        
        white_image = self.get_black_image(img.size)
        white_image.paste(head,head_coords[0])

        for part in ['eye','nose','mouth','ear']:
            if part_json[part] is not None:
                part_coords= self.get_coords(part_json[part])
                part_img = part_imgs[part]
                for i in range(len(part_img)):
                    white_image.paste(self.get_black_image(part_img[i].size),part_coords[i])
        #! uppper body랑 loower 바디까지 빼버리기 
            #   if part in ['eye','ear']:   
            #       white_image.paste(self.get_black_image(part_img[0].size),part_coords[0])
            #       white_image.paste(self.get_black_image(part_img[1].size),part_coords[1])
            #   else:
            #       white_image.paste(self.get_black_image(part_img[0].size),part_coords[0])
        # white_image.show()

            
        return white_image.crop(head_coords[0]), [[head_coords[0][0],head_coords[0][1]],[head_coords[0][2],head_coords[0][3]]]
    def get_empty_upper_body(self,img, part_imgs, part_json):
        '''
        1. 백지에 몸통 붙이기
        2. 양 팔 붙이고 손떼기
        3. 새로운 upperbody 좌표 규정.
        '''
        white_image = Image.new("RGB", img.size, (255, 255, 255))
        upper_body_json = part_json['upper_body']
        upper_body_coords = self.get_coords(upper_body_json)
        upper_body = part_imgs['upper_body'][0]#!
        white_image.paste(upper_body,upper_body_coords[0])


        # white_image.paste(left_hand,hand_coords[0])
        # white_image.paste(right_hand,hand_coords[1])
        if part_json["hand"] is not None:
            part_coords= self.get_coords(part_json["hand"])
            part_img = part_imgs["hand"]
            for i in range(len(part_img)):
                white_image.paste(Image.new("RGB", part_img[i].size, (255, 255, 255)),part_coords[i])
        
        return white_image.crop(upper_body_coords[0]), [[upper_body_coords[0][0],upper_body_coords[0][1]],[upper_body_coords[0][2],upper_body_coords[0][3]]]

    def get_empty_lower_body(self,img, part_imgs, part_json):
        '''
        empty_lower_body detacched foot
        leg 두개를 빈 도화지에 붙이고 발을 뗀뒤 empty lower body로 규정.
        '''
        white_image = self.get_black_image(img.size)
        lower_body_json = part_json['lower_body']
        lower_body_coords = self.get_coords(lower_body_json)
        lower_body = part_imgs['lower_body'][0]#!
        white_image.paste(lower_body,lower_body_coords[0])
            # white_image.paste(leg_img[i],leg_coords[i])
        # for i,pocket in enumerate(part_imgs['pocket']):
        #     # pocket.show()
        #     white_image.paste(pocket,pocket_coords[i])
        # white_image.show()
        if part_json["foot"] is not None:
            part_coords= self.get_coords(part_json["foot"])
            part_img = part_imgs["foot"] 
            for i in range(len(part_img)):
                white_image.paste(Image.new("RGB", part_img[i].size, (255, 255, 255)),part_coords[i])
        # white_image.crop(lower_body_coords).show()
        return white_image.crop(lower_body_coords[0]),[[lower_body_coords[0][0],lower_body_coords[0][1]],[lower_body_coords[0][2],lower_body_coords[0][3]]]

    
    def create_new_images(self,img, binary_combination, part_imgs,part_json):
        #! Making New images
        original_img = img
        door_active, glass_active, wheel_active, light_active, sideglass_active = binary_combination
        new_image = img.copy()
        #! Original image에서 Lower body, Upperbody빼고 모두 없앰.
        if (part_json["door"] is not None):#empty face를 떼는코드.
            for i in range(len(part_imgs['door'])):
                new_image.paste(self.get_black_image(part_imgs['door'][i].size),self.get_coords(part_json['door'])[i])
        if (part_json["glass"] is not None):
            for i in range(len(part_imgs['glass'])):
                new_image.paste(self.get_black_image(part_imgs['glass'][i].size),self.get_coords(part_json['glass'])[i])
        if (part_json["wheel"] is not None):
            for i in range(len(part_imgs['wheel'])):
                new_image.paste(self.get_black_image(part_imgs['wheel'][i].size),self.get_coords(part_json['wheel'])[i])
        if (part_json["light"] is not None):
            for i in range(len(part_imgs['light'])):
                new_image.paste(self.get_black_image(part_imgs['light'][i].size),self.get_coords(part_json['light'])[i])
        if (part_json["sideglass"] is not None):
            for i in range(len(part_imgs['sideglass'])):
                new_image.paste(self.get_black_image(part_imgs['sideglass'][i].size),self.get_coords(part_json['sideglass'])[i])
    

          
        #!######
        
        if door_active and (part_json['door'] is not None):
            for i in range(len(part_imgs['door'])):
            # new_image.paste(part_imgs["door"][0],self.get_coords(part_json['door'])[0])
                new_image.paste(part_imgs["door"][i],self.get_coords(part_json['door'])[i])
            if (part_json["sideglass"] is not None):
                for i in range(len(part_imgs['sideglass'])):#side glassrㅏ 보통 door위에있다.
                    new_image.paste(self.get_black_image(part_imgs['sideglass'][i].size),self.get_coords(part_json['sideglass'])[i])
            
        # 각 파트 이미지를 읽어와서 새로운 이미지에 붙임
        if glass_active and (part_json["glass"] is not None):
            for i in range(len(part_imgs["glass"])):
                new_image.paste(part_imgs["glass"][i], self.get_coords(part_json['glass'])[i])  # 원하는 위치에 붙임
        if wheel_active and (part_json["wheel"] is not None):
            for i in range(len(part_imgs["wheel"])):
                new_image.paste(part_imgs["wheel"][i], self.get_coords(part_json['wheel'])[i])  # 원하는 위치에 붙임
        if light_active and (part_json["light"] is not None):
            for i in range(len(part_imgs["light"])):
                new_image.paste(part_imgs["light"][i], self.get_coords(part_json['light'])[i])  # 원하는 위치에 붙임
        if sideglass_active and (part_json["sideglass"] is not None):
            for i in range(len(part_imgs["sideglass"])):
                new_image.paste(part_imgs["sideglass"][i], self.get_coords(part_json['sideglass'])[i])  # 원하는 위치에 붙임
        return new_image
    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        img_path = os.path.join(self.data_folder,self.image_list[idx])
        # print(img_path)
        label = self.label_list[idx]
        image = Image.open(img_path)
        if image.mode == 'L':
            image = image.convert('RGB')
        part_json = self.get_part_json(os.path.join(self.json_folder,self.json_paths[idx]))#! 존재하는 모든 part에 대해서 불러옴.
        part_imgs = {}
        for part in part_json.keys():#모든 part를 잘라서 다시 dict으로 리턴하기위함.
            part_imgs[part]=[]
            coords = self.get_coords(part_json[part])
            if coords is None:
                part_imgs[part].append(None)    
            # elif len(coords) ==1:
            #     part_imgs[part].append(image.crop(coords[0]))    
            # elif len(coords) == 2:
            #     part_imgs[part].append(image.crop(coords[0]))    
            #     part_imgs[part].append(image.crop(coords[1]))
            else:
                for i in range(len(coords)):
                    part_imgs[part].append(image.crop(coords[i]))    
        # empty_face.show()
        # empty_upper_body, empty_upper_body_coords = self.get_empty_upper_body(image,part_imgs,part_json)
        # empty_lower_body, empty_lower_body_coords= self.get_empty_lower_body(image,part_imgs,part_json)
        # empty_face , empty_face_coords= self.get_empty_face(image,part_imgs,part_json)
        # # only_hair, only_hair_coords = self.get_only_hair(image,part_imgs,part_json)
        # # only_face, only_face_coords = self.get_only_face(image,part_imgs,part_json)
        # part_imgs['empty_face']=[empty_face]
        # part_json['empty_face']=[empty_face_coords]
        # part_imgs['empty_lower_body']=[empty_lower_body]
        # part_json['empty_lower_body']=[empty_lower_body_coords]
        # part_imgs['empty_upper_body']=[empty_upper_body]
        # part_json['empty_upper_body']=[empty_upper_body_coords]#좌표 바뀌어서 넣어줘야함.
        original_image=image
        
        part_combinations = list(itertools.product([0, 1], repeat=5))
        new_imgs = []
        # print(part_json)
        for combination in part_combinations:
            # print(combination)
            new_img=self.create_new_images(img=image,binary_combination=combination, part_imgs=part_imgs,part_json=part_json)
            if self.transform:
                new_img=self.transform(new_img)#.expand(3,-1,-1)
            new_imgs.append(new_img.unsqueeze(0))
        new_imgs = torch.cat(new_imgs,dim=0)
        # image = self.transform(image)
        # image_3ch = image.expand(3,-1,-1)
        return new_imgs, self.transform(original_image), label ,img_path

In [None]:
from shapley.transform import ThresholdTransform
transform_car=transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225])
    ])
check=transforms.Compose([
    transforms.Resize((300,400)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406],
                # std=[0.229, 0.224, 0.225])
    ])

In [None]:
check_dataset = Shapley_part_car(data_folder='/local_datasets/stanford_car/01/test/',anno_path='/data/jong980812/project/part_shapley/dataset/car/csv_files/cars_test.csv',
                                 json_folder='/data/jong980812/project/part_shapley/dataset/car/annotations',transform=check)
index=3
topil = transforms.ToPILImage()
for i in [1,2,4,8,16]:
    topil(check_dataset[index][0][i]).show()
topil(check_dataset[index][1])
print(check_dataset[index][-1])
print(check_dataset[index][3])


In [None]:
import matplotlib.pyplot as plt
import os
import json
from PIL import Image
import random

def shapley_class(part,part_count, task, class_name, num_correct, nb_data, save_path):
    plt.clf()
    data2={}
    for i in range(len(part)):
        data2[part[i]]=(list(part_count.values())[i])/nb_data
    # {0: 1139, 1: 5, 2: 3, 3: 47, 4: 5, 5: 5, 6: 2}
    # 딕셔너리의 key와 value를 각각 리스트로 추출
    x = list(data2.keys())
    y = list(data2.values())

    # 그래프 생성
    plt.bar(x, y)

    # x축과 y축에 라벨 추가
    plt.xlabel('x')
    plt.ylabel('y')

    # 그래프 제목 추가
    # plt.title(f'{num_correct}/{len(dataset)}={num_correct/len(dataset)*100}%')
    plt.title(f'{task} task  : {class_name} samples\n{num_correct}/{nb_data}={num_correct/nb_data*100:.2f}%')
    # 그래프 표시
    plt.savefig(os.path.join(save_path,f'{task}_{class_name}.png'))
    plt.show()
    
    
def shapley_task(part,part_count_list, task, class_name, num_correct, nb_data, save_path):
    plt.clf()
    data2={}
    for part_name in part:
        data2[part_name]=0
    for i in range(len(part)):
        value=0
        for part_count in part_count_list:
            value+=(list(part_count.values())[i])
        data2[part[i]]=value / nb_data
    # {0: 1139, 1: 5, 2: 3, 3: 47, 4: 5, 5: 5, 6: 2}
    # 딕셔너리의 key와 value를 각각 리스트로 추출
    x = list(data2.keys())
    y = list(data2.values())

    # 그래프 생성
    plt.bar(x, y)

    # x축과 y축에 라벨 추가
    plt.xlabel('x')
    plt.ylabel('y')

    # 그래프 제목 추가
    # plt.title(f'{num_correct}/{len(dataset)}={num_correct/len(dataset)*100}%')
    plt.title(f'{task} task total  : \n{num_correct}/{nb_data}={num_correct/nb_data*100:.2f}%')
    # 그래프 표시
    plt.savefig(os.path.join(save_path,f'{task}_total.png'))
    plt.show()

def representative_each_class(shapley_lists, best_part_list,task, class_names, json_path, n_show,save_path):
    '''
    shapley_list: shapley value and path of all images
    task: Task name
    class names: all class name
    json path: json path to get part information
    '''
    num_class = len(class_names)
    part= ['door','glass','wheel','light','sideglass']


    for class_index, class_name in enumerate(class_names):
        plt.clf()
        fig, axes = plt.subplots(num_class, n_show,figsize=(30,80))
        plotting_order = [i for i, _ in enumerate(class_names) if i != class_index]
        best_part = best_part_list[class_name]
        shapley_list = shapley_lists[class_name]
        #가장 큰 값의 key value pair  tuple
        sorted_shapley_dict = {k: v for k, v in sorted(shapley_list.items(), key=lambda item: item[1][best_part],reverse=True)} 
        sorted_shapley_list=list(sorted_shapley_dict.keys())[:n_show]
        for i in range(num_class):
            for j in range(n_show):
                if i==0:#beset
                    axes[i, j].imshow(Image.open(sorted_shapley_list[j]).resize((448,336)),cmap='gray')#(get_humna_body(sorted_shapley_list[j],json_path), cmap='gray')
                    axes[i, j].axis('off')
                else:
                    img_list=shapley_lists[class_names[plotting_order[i-1]]]#another class imig lists
                    random_key = random.choice(list(img_list.keys()))
                    axes[i, j].imshow(Image.open(random_key).resize((448,336)),cmap='gray') #(get_humna_body(random_key,json_path), cmap='gray')
                    axes[i, j].axis('off')
        for i in range(num_class):        
            if i ==0:
                axes[i, 0].set_title(f'Best {class_name} - {part[best_part]}', fontsize=20,ha='center')
            else:
                axes[i, 0].set_title(f'{class_names[plotting_order[i-1]]} - {part[best_part]}', fontsize=20)
        plt.subplots_adjust(wspace=0.1, hspace=0.1)
        plt.savefig(os.path.join(save_path,f'{task}_{class_name}_{part[best_part]}_vs_another.png'))
        plt.show()

# def get_humna_body(image_path,json_path):
#     json_full_path =os.path.join(json_path, image_path.split('/')[-1].split('.')[0] + ".json")
#     part_json = get_part_json(json_full_path)
#     human_body_coords =get_coords(part_json['human_body'])
#     img = Image.open(image_path)
#     return img.crop(human_body_coords[0]).resize((300,400))
    
    
# def get_part_json(self, json_file_path):
#     '''
#     Get part dictionary from json path
#     '''
#     part_json = {}
#     part_name = ['door','glass','wheel','light','sideglass']
#     # door_active, glass_active, wheel_active, light_active, sideglass_active = binary_combination
    
#     with open(json_file_path, 'r') as f:
#         boxes = json.load(f)['shapes']
#         for part in part_name:
#             part_json[part]=[]
#         for box in boxes:
#             part_json[box["label"]].append(box["points"])

#     for key in part_json:#! 빈 애들은 None으로 처리해서 없다고 판단.
#         if part_json[key] == []:
#             part_json[key] = None
#     return part_json
# def get_coords(part):
#     extracted_coordinates = []
#     if part is None:
#         return None
#     elif len(part) == 1:
#         # print(part[0][0])
#         xmin, ymin = list(map(int,part[0][0]))
#         xmax, ymax = list(map(int,part[0][1]))
#         return [[xmin,ymin,xmax,ymax]]#아래 2일경우와 통일하기 위해 이중 리스트로 
#     elif len(part) == 2:
#         #! Eye, Ear, hand, foot -> These have 2 part, return list
#         for a in part: 
#             # print(a)
#             xmin, ymin = list(map(int,a[0]))
#             xmax, ymax = list(map(int,a[1]))
#             extracted_coordinates.append([xmin,ymin,xmax,ymax])
#         return extracted_coordinates
#     else:
#         for a in part: 
#             # print(a)
#             xmin, ymin = list(map(int,a[0]))
#             xmax, ymax = list(map(int,a[1]))
#             extracted_coordinates.append([xmin,ymin,xmax,ymax])
#         return extracted_coordinates

In [None]:
from shapley.get_shapley_value import *
all_ordered_pair,weights = get_ordered_pair(i=5)
# Collect each class result
part_count_list = []
nb_data_list = []
num_correct_list = []
part_number = all_ordered_pair.shape[0]
shapley_img_lists=dict()
best_part_index = dict()
class_names = ['Cab','Convertible','Coupe','Hatchback','Minivan','Other','SUV','Sedan','Van','Wagon']
data_path = '/local_datasets/stanford_car/01/test/'
json_path = '/data/jong980812/project/part_shapley/dataset/car/annotations'
task = 'CAR'
save_path = 'dataset/car'
model.eval()
for index,class_name in enumerate(class_names):
    print(f'\n#####################Target_class:{class_name} getting Shapley value#####################')
    dataset = Shapley_part_car(data_folder='/local_datasets/stanford_car/01/test/',anno_path=f'/data/jong980812/project/part_shapley/dataset/car/csv_files/car_type_lab_{index}.csv',
                                 json_folder='/data/jong980812/project/part_shapley/dataset/car/annotations',transform=transform_car)
    data_loader=DataLoader(dataset,30,shuffle=False,num_workers=8)
    print(dataset)
    num_correct = 0
    part_count = {i: 0 for i in range(part_number)}
    image_and_shapley=dict()
    for new_imgs, original_image, label,img_paths in tqdm(data_loader):
        # print(new_imgs.shape)
        input_data = new_imgs
        # print('complete')
        batch_size = input_data.shape[0]
        input_data = rearrange(input_data,  'b t c h w -> (b t) c h w')
        
        model.to(device)
        input_data = input_data.to(device)
        original_image = original_image.to(device)
        label = label.to(device)

        with torch.no_grad():
            prediction = model(original_image)
            output = model(input_data)

        output = rearrange(output, '(b t) o -> b t o', b=batch_size) # batch_size, 128, output(2)
        prediction = prediction.argmax(dim=-1)
        # print(output.shape)
        # print(label)
        for i in range(batch_size):
            if prediction[i] == label[i]:
                num_correct +=1
                img_name = img_paths[i]
                correct_output = output[:,:,label[i]]# Take correct logits,  (b, 128), 밖에서. 
                shapley_matrix = get_shapley_matrix(all_ordered_pair,correct_output[i])
                shapley_contributions = shapley_matrix[:,:,1] - shapley_matrix[:,:,0] 
                shapley_value = (shapley_contributions * 1/weights).sum(dim=1)
                image_and_shapley[img_name]=shapley_value.detach().tolist()
                max_part_number = (int(shapley_value.argmax()))
                part_count[max_part_number] += 1
    shapley_img_lists[class_name]=image_and_shapley
    best_part_index[class_name] = max(part_count, key=part_count.get)
    acc = num_correct/len(dataset)
    print(f'Shapley result\n:{part_count}')
    print(f'Inference\n:{num_correct}/{len(dataset)} = {acc}')
    # 주어진 딕셔너리
    part= ['door','glass','wheel','light','sideglass']
    num_correct_list.append(num_correct)
    part_count_list.append(part_count) # For Total shapley
    nb_data_list.append(len(dataset)) # For Total shapley
    shapley_class(
                part = part, 
                part_count= part_count, 
                task= task,
                class_name = class_name,
                num_correct=num_correct,
                nb_data=len(dataset),
                save_path=save_path)
shapley_task(
            part = part, 
            part_count_list= part_count_list, 
            task= task,
            class_name = class_name,
            num_correct=sum(num_correct_list),
            nb_data=sum(nb_data_list),
            save_path=save_path)
representative_each_class(shapley_lists=shapley_img_lists,
                            best_part_list=best_part_index,
                            task=task,
                            class_names=class_names,
                            n_show=2,
                            save_path=save_path,
                            json_path=json_path)

In [None]:
representative_each_class(shapley_lists=shapley_img_lists,
                            best_part_list=best_part_index,
                            task=task,
                            class_names=class_names,
                            n_show=2,
                            save_path=save_path,
                            json_path=json_path)

In [None]:
import pandas as pd

# CSV 파일을 읽어옵니다.
df = pd.read_csv("/data/jong980812/project/part_shapley/dataset/car/csv_files/stanford_cars_type.csv")
df['car_code']-=1
# "car_type" 열을 기준으로 그룹화하고 각 그룹에서 "car_code"를 리스트로 모읍니다.
car_code_by_type = df.groupby("car_type")["car_code"].apply(list).reset_index()

# 결과를 출력합니다.
car_code_by_type['index'] = car_code_by_type.index
print(car_code_by_type['car_type'])



In [None]:
import pandas as pd

# 'car_train.csv' 파일을 읽어옵니다.
df_car_train = pd.read_csv('./dataset/car/csv_files/cars_test.csv')
df_car_train['car_type_lab'] = ''

for i, lab in enumerate(df_car_train['lab']):
    for index, car_code in enumerate(car_code_by_type['car_code']):
        if lab in car_code:
            df_car_train.loc[i, 'car_type_lab'] = index


In [None]:
# df_car_train DataFrame을 CSV 파일로 저장
df_car_train.to_csv('./dataset/car/cars_test_with_type.csv', index=False)


In [None]:
set(car_code_by_type['car_code'][2])

In [None]:
import pandas as pd

# 'stanford_car_type.csv' 파일을 읽어옵니다.
df_car_type = pd.read_csv('/data/jong980812/project/part_shapley/dataset/car/stanford_cars_type.csv')

# 중복되지 않는 'car_code' 목록을 확인합니다.
unique_car_codes_by_type = df_car_type.drop_duplicates(subset=['car_type', 'car_code']).groupby('car_type')['car_code'].apply(list).reset_index()

# 결과를 출력합니다.
print(unique_car_codes_by_type['car_code'][2])


In [None]:
import pandas as pd

# 'stanford_car_type.csv' 파일을 읽어옵니다.
df_car_type = pd.read_csv('/data/jong980812/project/part_shapley/dataset/car/stanford_cars_type.csv')

# 'car_type' 별로 'car_code'의 개수를 세어줍니다.
car_code_count_by_type = df_car_type.groupby('car_type')['car_code'].count().reset_index()

# 결과를 출력합니다.
print(car_code_count_by_type)


In [None]:
sum(car_code_count_by_type['car_code'])

In [None]:
import pandas as pd

# 'car_train.csv' 파일을 읽어옵니다.
df_car_train = pd.read_csv('./dataset/car/csv_files/cars_test_with_type.csv')

# 'car_type_lab' 별로 데이터를 그룹화합니다.
grouped = df_car_train.groupby('car_type_lab')

# 각 'car_type_lab'에 대해 별도의 CSV 파일로 저장합니다.
for group_name, group_data in grouped:
    output_file_name = f'./dataset/car/csv_files/car_type_lab_{group_name}.csv'
    group_data.to_csv(output_file_name, index=False)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# x축 값 (임의로 설정)
x_values = ['Wheel', 'Light', 'Door', 'Glass', 'Sideglass']

# James, Dean, Smith의 기여도 값을 임의로 설정
james_values = [0.2, 0.1, 0.3, 0.4, 0.2]
dean_values = [0.4, 0.3, 0.1, 0.2, 0.4]


# 막대 그래프 생성
plt.bar(x_values, james_values, label='Man', color='b', alpha=0.7)
plt.bar(x_values, dean_values, label='Woman', color='g', bottom=james_values, alpha=0.7)


# 범례 추가
plt.legend()

# y축 범위 설정 (0에서 1)
plt.ylim(0, 1)

# 그래프 표시
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# x축 값 (임의로 설정)
x_values = ['Wheel', 'Light', 'Door', 'Glass', 'Sideglass']


# 10명의 사람이 기여한 값 (임의로 설정)
contributions = [
    [0.2, 0.1, 0.3, 0.4, 0.2],
    [0.4, 0.3, 0.1, 0.2, 0.4],
    [0.3, 0.6, 0.4, 0.3, 0.1],
    [0.1, 0.2, 0.3, 0.2, 0.4],
    [0.5, 0.2, 0.4, 0.3, 0.2],
    [0.2, 0.1, 0.3, 0.4, 0.2],
    [0.4, 0.3, 0.1, 0.2, 0.4],
    [0.3, 0.6, 0.4, 0.3, 0.1],
    [0.1, 0.2, 0.3, 0.2, 0.4],
    [0.5, 0.2, 0.4, 0.3, 0.2]
]

# contributions 배열을 NumPy 배열로 변환
contributions = np.array(contributions)

# 누적 막대 그래프 생성
bottom = np.zeros(len(x_values))
for i in range(10):
    plt.bar(x_values, contributions[i], alpha=0.7, label=f'Contributor {i + 1}', bottom=bottom)
    bottom += contributions[i]

# 범례 추가
plt.legend()

# y축 범위 설정 (0에서 1)
plt.ylim(0, 5)

# 그래프 표시
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# x축 값 (임의로 설정)
x_values = ['A', 'B', 'C', 'D', 'E']

# 10명의 사람이 기여한 값 (임의로 설정)
contributions = [
    [0.2, 0.1, 0.3, 0.4, 0.2],
    [0.4, 0.3, 0.1, 0.2, 0.4],
    [0.3, 0.6, 0.4, 0.3, 0.1],
    [0.1, 0.2, 0.3, 0.2, 0.4],
    [0.5, 0.2, 0.4, 0.3, 0.2],
    [0.2, 0.1, 0.3, 0.4, 0.2],
    [0.4, 0.3, 0.1, 0.2, 0.4],
    [0.3, 0.6, 0.4, 0.3, 0.1],
    [0.1, 0.2, 0.3, 0.2, 0.4],
    [0.5, 0.2, 0.4, 0.3, 0.2]
]

# contributions 배열을 NumPy 배열로 변환
contributions = np.array(contributions)

# 그래프 크기 설정
plt.figure(figsize=(10, 6))

# 누적 막대 그래프 생성
bottom = np.zeros(len(x_values))
for i in range(10):
    plt.bar(x_values, contributions[i], alpha=0.7, label=f'Contributor {i + 1}', bottom=bottom)
    bottom += contributions[i]

# 범례 추가 및 위치 조절
plt.legend(loc='upper right')

# y축 범위 설정 (0에서 1)
plt.ylim(0, 3)

# 그래프 표시
plt.show()
