영상 개별 테스트(영상 티렉토리) 밑으로 추가

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import cv2
import numpy as np
from keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img
import glob
import pandas as pd 
from natsort import natsorted

#csv 파일 경로
csv_file_path = '/content/drive/MyDrive/ttproject/logtest/dataset_tiktok.csv'
#csv 파일 읽기
csv_data=pd.read_csv(csv_file_path)

## 동영상들을 입력값으로 사용해 이미지 프레임화후, 각 폴더로 저장함

In [2]:
dirpath = '/content/drive/MyDrive/ttproject/logtest/test_video'

for filename in os.listdir(dirpath):
    if filename.endswith(".mp4"):
        filepath = os.path.join(dirpath, filename)

        video = cv2.VideoCapture(filepath)

        if not video.isOpened():
            print("Could not open:", filepath)
            continue

        # 비디오 파일 정보 출력
        length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = video.get(cv2.CAP_PROP_FPS)

        print("Processing video: ", filename)
        print("length:", length)
        print("width:", width)
        print("height:", height)
        print("fps:", fps)

        # 디렉토리 생성
        directory = os.path.splitext(filepath)[0]  # 동영상 파일명에서 확장자를 제외한 부분을 디렉토리명으로 사용
        try:
            if not os.path.exists(directory):
                os.makedirs(directory)
        except OSError:
            print('Error: Creating directory -', directory)
            continue

        count = 0
        while video.isOpened():
            ret, image = video.read()
            if not ret:
                break
            if int(video.get(1)) % int(fps) == 0:  # 1초마다 이미지 추출
                cv2.imwrite(directory + "/frame%d.jpg" % count, image)
                print('Saved frame number:', int(video.get(1)))
                count += 1

        video.release()


Processing video:  thisbabyslay_.mp4
length: 334
width: 406
height: 720
fps: 24.0
Saved frame number: 24
Saved frame number: 48
Saved frame number: 72
Saved frame number: 96
Saved frame number: 120
Saved frame number: 144
Saved frame number: 168
Saved frame number: 192
Saved frame number: 216
Saved frame number: 240
Saved frame number: 264
Saved frame number: 288
Saved frame number: 312
Processing video:  ddllolol.mp4
length: 761
width: 406
height: 720
fps: 24.0
Saved frame number: 24
Saved frame number: 48
Saved frame number: 72
Saved frame number: 96
Saved frame number: 120
Saved frame number: 144
Saved frame number: 168
Saved frame number: 192
Saved frame number: 216
Saved frame number: 240
Saved frame number: 264
Saved frame number: 288
Saved frame number: 312
Saved frame number: 336
Saved frame number: 360
Saved frame number: 384
Saved frame number: 408
Saved frame number: 432
Saved frame number: 456
Saved frame number: 480
Saved frame number: 504
Saved frame number: 528
Saved fra

In [5]:
image_size = (180, 180)
channels = 3
#본 모델에서 180, 180 3채널로 고정시켰기 때문에 고정변수입니다 이미지사이즈

In [6]:
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

# 모델 불러오기, 분류 class 설정

In [7]:
exposure_model = load_model('/content/drive/MyDrive/newVGG16(노출비노출).h5')
violence_model = load_model('/content/drive/MyDrive/newVGG16(폭력비폭력).h5')
relationship_model = load_model('/content/drive/MyDrive/newVGG16(관계비관계).h5')

In [8]:
naked_classes = ['Naked', 'Non-Naked'] 
violence_classes = ['Non-Violence', 'Violence'] 
sexual_classes = ['Sexual', 'Non-Sexual']     

In [9]:
def preprocess_image(image):
  image = cv2.resize(image, image_size)
  image = image.astype('float32') / 255.0
  return image

In [10]:
def classify_image(image):
    preprocessed_image = preprocess_image(image)
    naked_prediction = exposure_model.predict(np.expand_dims(preprocessed_image, axis=0))[0]
    violence_prediction = violence_model.predict(np.expand_dims(preprocessed_image, axis=0))[0]
    sexual_prediction = relationship_model.predict(np.expand_dims(preprocessed_image, axis=0))[0]

    # 의도적으로 blind 폴더에 이동하는 예시를 확인하기 위해 확률을 낮게 조정
    naked_class = naked_classes[np.argmax(naked_prediction)] if np.max(naked_prediction) > 0.1 else 'Uncertain'
    violence_class = violence_classes[np.argmax(violence_prediction)] if np.max(violence_prediction) > 0.1 else 'Uncertain'
    sexual_class = sexual_classes[np.argmax(sexual_prediction)] if np.max(sexual_prediction) > 0.1 else 'Uncertain'

    return naked_class, violence_class, sexual_class

## 폴더 전체 분류 테스트

In [11]:
# 틱톡 전체 영상 root 디렉토리
root_path = '/content/drive/MyDrive/ttproject/logtest/test_video'
folder_df = pd.DataFrame(columns=["폴더명", '유해/비유해 예측', "노출 예측", "폭력 예측", "관계 예측"])

for root, dirs, files in os.walk(root_path):
    for folder in dirs:
        folder_path = os.path.join(root, folder)
        folder_name = os.path.basename(folder_path)
        filenames = natsorted(os.listdir(folder_path))

        frame_df = pd.DataFrame({"filename": filenames})
        frame_df["path"] = frame_df["filename"].apply(lambda x: os.path.join(folder_path, x))
        frame_df[["노출", "폭력", "관계"]] = frame_df["path"].apply(lambda x: pd.Series(classify_image(cv2.imread(x))))
        frame_df = frame_df.drop(columns=["path"])

        is_harmful = (frame_df['노출'] == 'Naked').sum() > len(frame_df)*0.1 or (frame_df['폭력'] == 'Violence').sum() > len(frame_df)*0.1 or (frame_df['관계'] == 'Sexual').sum() > len(frame_df)*0.1
        folder_df = folder_df.append({
            "폴더명": folder_name,
            "유해/비유해 예측": "harmful" if is_harmful else "unharmful",
            "노출 예측": "Naked" if (frame_df['노출'] == 'Naked').sum() > len(frame_df)*0.5 else "Non-Naked",
            "폭력 예측": "Violence" if (frame_df['폭력'] == 'Violence').sum() > len(frame_df)*0.5 else "Non-Violence",
            "관계 예측": "Sexual" if (frame_df['관계'] == 'Sexual').sum() > len(frame_df)*0.5 else "Non-Sexual"
        }, ignore_index=True)


# csv파일의 authorMeta/name과 mp4와 매핑하여 사용자 이름 출력
folder_df["사용자 이름"] = ""
folder_df.set_index("폴더명", inplace=True)  # 인덱스 설정
folder_df.loc[folder_df.index.isin(csv_data["authorMeta/name"]), "사용자 이름"] = folder_df.index

folder_df.reset_index(inplace=True)  # 인덱스 재설정

folder_df = folder_df.sort_values("폴더명", ascending=True).reset_index(drop=True)
folder_df



  folder_df = folder_df.append({




  folder_df = folder_df.append({




  folder_df = folder_df.append({




  folder_df = folder_df.append({




  folder_df = folder_df.append({


Unnamed: 0,폴더명,유해/비유해 예측,노출 예측,폭력 예측,관계 예측,사용자 이름
0,angcherry,unharmful,Non-Naked,Non-Violence,Non-Sexual,angcherry
1,ddllolol,unharmful,Non-Naked,Non-Violence,Non-Sexual,ddllolol
2,hea_dli_,unharmful,Non-Naked,Non-Violence,Non-Sexual,hea_dli_
3,siiiirodeath_,unharmful,Non-Naked,Non-Violence,Non-Sexual,siiiirodeath_
4,thisbabyslay_,unharmful,Non-Naked,Non-Violence,Non-Sexual,thisbabyslay_


### 각 동영상이 유해하다고 예측 될 경우, blind 폴더로 이동 해, 그 경로를 저장함

In [12]:
import csv
import shutil

# 입력 경로 : 전체 영상들이 들어있는 상위 폴더
folder_path = '/content/drive/MyDrive/ttproject/logtest/test_video'

# blind 폴더 경로
output_folder = '/content/drive/MyDrive/ttproject/logtest/blind'
blind_df = pd.DataFrame(columns=['폴더명', 'blind 경로'])

for file_name in os.listdir(folder_path):
    if any((folder_df['폴더명'] == file_name) & (folder_df['유해/비유해 예측'] == 'harmful')):
        jpg_file_path = os.path.join(folder_path, file_name)
        shutil.move(jpg_file_path, os.path.join(output_folder, file_name))
        # + csv 파일 연동 - 영상-정보 mapping - log 같이 넘어가도록

        # blind처리 된 폴더 경로 excel에 연결
        path = output_folder + '/'+file_name
        folder_df.loc[folder_df['폴더명'] == file_name, 'blind 경로'] = os.path.join(output_folder, file_name)
    else:
        pass

folder_df

Unnamed: 0,폴더명,유해/비유해 예측,노출 예측,폭력 예측,관계 예측,사용자 이름
0,angcherry,unharmful,Non-Naked,Non-Violence,Non-Sexual,angcherry
1,ddllolol,unharmful,Non-Naked,Non-Violence,Non-Sexual,ddllolol
2,hea_dli_,unharmful,Non-Naked,Non-Violence,Non-Sexual,hea_dli_
3,siiiirodeath_,unharmful,Non-Naked,Non-Violence,Non-Sexual,siiiirodeath_
4,thisbabyslay_,unharmful,Non-Naked,Non-Violence,Non-Sexual,thisbabyslay_


### blind 경로 + 사용자 이름 + 예측값 포함된 csv 파일 저장
판단 위해 실제 유해/비유해 여부가 작성되어 있는 csv 파일에 예측 값을 추가함

In [14]:
output_df = pd.read_excel('/content/drive/MyDrive/ttproject/logtest/logtest.xlsx')
df_A = pd.DataFrame(output_df, columns=output_df.keys())
df_B = pd.DataFrame(folder_df, columns=folder_df.keys())

# 위에 결과 나오면 폴더명을 키값으로 아래 output_df에 붙여넣기 하면 끄읏
result = pd.merge(df_A, df_B, on="폴더명", how="left")

In [15]:
result.to_excel('/content/drive/MyDrive/logresult.xlsx', index=False)