영상 개별 테스트(영상 티렉토리) 밑으로 추가

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import cv2
import numpy as np
from keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img
import glob
import pandas as pd 
from natsort import natsorted

#csv 파일 경로
csv_file_path = '/content/drive/MyDrive/dataset_tiktok.csv'
#csv 파일 읽기
csv_data=pd.read_csv(csv_file_path)

In [3]:
image_size = (180, 180)
channels = 3
#본 모델에서 180, 180 3채널로 고정시켰기 때문에 고정변수입니다 이미지사이즈

In [4]:
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [5]:
exposure_model = load_model('/content/drive/MyDrive/newVGG16(노출비노출).h5')
violence_model = load_model('/content/drive/MyDrive/newVGG16(폭력비폭력).h5')
relationship_model = load_model('/content/drive/MyDrive/newVGG16(관계비관계).h5')

In [6]:
naked_classes = ['Naked', 'Non-Naked'] 
violence_classes = ['Non-Violence', 'Violence'] 
sexual_classes = ['Sexual', 'Non-Sexual']     

In [7]:
def preprocess_image(image):
  image = cv2.resize(image, image_size)
  image = image.astype('float32') / 255.0
  return image

In [8]:
def classify_image(image):
    preprocessed_image = preprocess_image(image)
    naked_prediction = exposure_model.predict(np.expand_dims(preprocessed_image, axis=0))[0]
    violence_prediction = violence_model.predict(np.expand_dims(preprocessed_image, axis=0))[0]
    sexual_prediction = relationship_model.predict(np.expand_dims(preprocessed_image, axis=0))[0]

    # 확률이 0.9/0.85/0.8이상일 경우 해당 클래스를 반환, 그렇지 않으면 'Uncertain' 반환
    naked_class = naked_classes[np.argmax(naked_prediction)] if np.max(naked_prediction) > 0.9 else 'Uncertain'
    violence_class = violence_classes[np.argmax(violence_prediction)] if np.max(violence_prediction) > 0.85 else 'Uncertain'
    sexual_class = sexual_classes[np.argmax(sexual_prediction)] if np.max(sexual_prediction) > 0.8 else 'Uncertain'

    return naked_class, violence_class, sexual_class

In [9]:
# 틱톡 전체 영상 root 디렉토리
root_path = '/content/drive/MyDrive/video'
folder_df = pd.DataFrame(columns=["폴더명", '유해/비유해 예측', "노출 예측", "폭력 예측", "관계 예측"])

for root, dirs, files in os.walk(root_path):
    for folder in dirs:
        folder_path = os.path.join(root, folder)
        folder_name = os.path.basename(folder_path)
        filenames = natsorted(os.listdir(folder_path))

        frame_df = pd.DataFrame({"filename": filenames})
        frame_df["path"] = frame_df["filename"].apply(lambda x: os.path.join(folder_path, x))
        frame_df[["노출", "폭력", "관계"]] = frame_df["path"].apply(lambda x: pd.Series(classify_image(cv2.imread(x))))
        frame_df = frame_df.drop(columns=["path"])

        is_harmful = (frame_df['노출'] == 'Naked').sum() > len(frame_df)*0.15 or (frame_df['폭력'] == 'Violence').sum() > len(frame_df)*0.15 or (frame_df['관계'] == 'Sexual').sum() > len(frame_df)*0.15
        folder_df = folder_df.append({
            "폴더명": folder_name,
            "유해/비유해 예측": "harmful" if is_harmful else "unharmful",
            "노출 예측": "Naked" if (frame_df['노출'] == 'Naked').sum() > len(frame_df)*0.15 else "Non-Naked",
            "폭력 예측": "Violence" if (frame_df['폭력'] == 'Violence').sum() > len(frame_df)*0.15 else "Non-Violence",
            "관계 예측": "Sexual" if (frame_df['관계'] == 'Sexual').sum() > len(frame_df)*0.15 else "Non-Sexual"
        }, ignore_index=True)


# csv파일의 authorMeta/name과 mp4와 매핑하여 사용자 이름 출력
folder_df["사용자 이름"] = ""
folder_df.set_index("폴더명", inplace=True)  # 인덱스 설정
folder_df.loc[folder_df.index.isin(csv_data["authorMeta/name"]), "사용자 이름"] = folder_df.index

folder_df.reset_index(inplace=True)  # 인덱스 재설정

folder_df = folder_df.sort_values("폴더명", ascending=True).reset_index(drop=True)
folder_df



  folder_df = folder_df.append({




  folder_df = folder_df.append({




  folder_df = folder_df.append({




  folder_df = folder_df.append({




  folder_df = folder_df.append({


Unnamed: 0,폴더명,유해/비유해 예측,노출 예측,폭력 예측,관계 예측,사용자 이름
0,angcherry,harmful,Non-Naked,Violence,Sexual,angcherry
1,ddllolol,harmful,Naked,Non-Violence,Non-Sexual,ddllolol
2,hea_dli_,harmful,Non-Naked,Violence,Sexual,hea_dli_
3,siiiirodeath_,harmful,Non-Naked,Violence,Sexual,siiiirodeath_
4,thisbabyslay_,harmful,Non-Naked,Non-Violence,Sexual,thisbabyslay_


In [10]:
import csv
import shutil

# 입력 경로 : 건전 영상 001을 통째로 이동시키기 위해 그 윗 경로를 지정, 단, video 폴더와 같이 해당 video들만 모여있는 파일로 되어있어야 함
folder_path = '/content/drive/MyDrive/video'

# blind 폴더 경로
output_folder = '/content/drive/MyDrive/blind'
blind_df = pd.DataFrame(columns=['폴더명', 'blind 경로'])

for file_name in os.listdir(folder_path):
    if any((folder_df['폴더명'] == file_name) & (folder_df['유해/비유해 예측'] == 'harmful')):
        jpg_file_path = os.path.join(folder_path, file_name)
        shutil.move(jpg_file_path, os.path.join(output_folder, file_name))
        # + csv 파일 연동 - 영상-정보 mapping - log 같이 넘어가도록

        # blind처리 된 폴더 경로 excel에 연결
        path = output_folder + '/'+file_name
        folder_df.loc[folder_df['폴더명'] == file_name, 'blind 경로'] = os.path.join(output_folder, file_name)
    else:
        pass

folder_df

Unnamed: 0,폴더명,유해/비유해 예측,노출 예측,폭력 예측,관계 예측,사용자 이름,blind 경로
0,angcherry,harmful,Non-Naked,Violence,Sexual,angcherry,/content/drive/MyDrive/blind/angcherry
1,ddllolol,harmful,Naked,Non-Violence,Non-Sexual,ddllolol,/content/drive/MyDrive/blind/ddllolol
2,hea_dli_,harmful,Non-Naked,Violence,Sexual,hea_dli_,/content/drive/MyDrive/blind/hea_dli_
3,siiiirodeath_,harmful,Non-Naked,Violence,Sexual,siiiirodeath_,/content/drive/MyDrive/blind/siiiirodeath_
4,thisbabyslay_,harmful,Non-Naked,Non-Violence,Sexual,thisbabyslay_,/content/drive/MyDrive/blind/thisbabyslay_


In [11]:
output_df = pd.read_excel('/content/drive/MyDrive/logtest.xlsx')
df_A = pd.DataFrame(output_df, columns=output_df.keys())
df_B = pd.DataFrame(folder_df, columns=folder_df.keys())

# 위에 결과 나오면 폴더명을 키값으로 아래 output_df에 붙여넣기 하면 끄읏
result = pd.merge(df_A, df_B, on="폴더명", how="left")

In [12]:
result.to_excel('/content/drive/MyDrive/logresult.xlsx', index=False)