pep8 is bad

UNCG-CSE · Dec 2, 2019 · f316a1a · f316a1a
1 parent 427e61f
commit f316a1a
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 73 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,3 +1,3 @@
 {
-    "python.pythonPath": "C:\\Users\\skool\\.virtualenvs\\Poststorm_Imagery-AfucPPEW\\Scripts\\python.exe"
-}
+    "python.pythonPath": "C:\\Users\\namenai\\.virtualenvs\\Poststorm_Imagery-ZWyQiJON\\Scripts\\python.exe"
+}
diff --git a/Pipfile b/Pipfile
@@ -28,4 +28,5 @@ imageio = "*"
 matplotlib = "*"
 scipy = "*"
 opencv-python = "*"
-glob = "*"
+scikit-learn = "*"
+tensorflow = "*"
diff --git a/src/python/psic/stats/tagging_stats/cnn_impact.py b/src/python/psic/stats/tagging_stats/cnn_impact.py
@@ -1,16 +1,13 @@
 # Imports.
 
 import pandas as pd
-import jsonpickle
-import numpy as np
-import matplotlib.pyplot as plt
-import os, os.path
-from os import path
-import dateutil.parser
-from datetime import datetime
-import statistics
-import glob
-import random
+# import numpy as np
+# import matplotlib.pyplot as plt
+import os
+# from os import path
+from sklearn.model_selection import train_test_split
+# from tensorflow import keras
+# from keras.preprocessing.image import ImageDataGenerator
 
 # Just a lil comment to rememebr wat each number for impact means
 # NoneId:0,
@@ -22,68 +19,17 @@
 # Global Constants.
 
 SELF_PATH = os.getcwd()
-PATH_TO_FILE_STREAM = 'G:\Shared drives\P-Sick'
-PATH_TO_TRAINING_IMAGES = os.path.join(PATH_TO_FILE_STREAM,'data\Florence/20180917a_jpgs')
-PATH_TO_TESTING_IMAGES = os.path.join(PATH_TO_FILE_STREAM,'data\Florence/20180918a_jpgs')
-TEST_IMAGE_SIZE = 30
+PATH_TO_FILE_STREAM = 'G:/Shared drives/P-Sick'
+PATH_TO_IMAGES = os.path.join(PATH_TO_FILE_STREAM, 'small/Florence/20180917a_jpgs')
+PATH_TO_TAG_CSV = os.path.join(SELF_PATH, '../tagging_data.csv')
 
-PATH_TO_TAG_CSV = os.path.join(SELF_PATH,'../tagging_data.csv')
+TEST_TO_TRAIN_RATIO = 0.3
 
 # First lets load the csv that has all the completely tagged image tags.
 df_image_tags = pd.read_csv(PATH_TO_TAG_CSV)
+# create training and test set
+tagged_image_list = df_image_tags['image_id'].tolist()
+training_images, testing_images = train_test_split(tagged_image_list, test_size=TEST_TO_TRAIN_RATIO, random_state=42)
 
-# Get a series of the images
-series_images = df_image_tags['image_id']
-
-# Check if all the images in the series exist
-# all_exist = True
-# count = 0
-# not_exist = 0
-# for image in series_images:
-#     count += 1
-#     all_exist = all_exist and path.exists(os.path.join(PATH_TO_TRAINING_IMAGES,image))
-#     if not path.exists(os.path.join(PATH_TO_TRAINING_IMAGES,image)):
-#         not_exist += 1
-
-# if all_exist:
-#     print(f'All {count} images exist')
-# else:
-#     print(f'Of {count} images, {not_exist} dont exist')
-
-#glob.glob("G:\\Shared drives\\P-Sick\\data\\Florence/20180917a_jpgs/jpgs/*.jpg")
-
-LIST_OF_POSSIBLE_TESTING_IMAGES = glob.glob(f"{PATH_TO_TESTING_IMAGES}/jpgs/*.jpg")
-# print(len(LIST_OF_POSSIBLE_TESTING_IMAGES))
-# print(type(LIST_OF_POSSIBLE_TESTING_IMAGES))
-
-LIST_OF_TRAINING_IMAGES = []
-
-selected_training_image_count = 0
-while (selected_training_image_count < TEST_IMAGE_SIZE):
-    secure_random = random.SystemRandom()
-    selected_image = secure_random.choice(LIST_OF_POSSIBLE_TESTING_IMAGES)
-
-    # # Make sure we dont select a duplicated file that has (1) in name
-    if selected_image.find("(1)") == -1:
-        split_image_name = selected_image.split("\\")[-1]
-        # Make sure we havent added this image
-        if split_image_name not in LIST_OF_TRAINING_IMAGES:
-            # Use split just to get the file name
-            LIST_OF_TRAINING_IMAGES.append(split_image_name)
-            selected_training_image_count+=1
-
-# Just incase, drop any duplicates
-LIST_OF_TRAINING_IMAGES =  list( dict.fromkeys(LIST_OF_TRAINING_IMAGES) )
-
-print('--- Training Set ---')
-print(series_images.head())
-print('\n')
-
-print('--- Testing Set ---')
-print(LIST_OF_TRAINING_IMAGES)
-# finding how many duplicates
-# duplicated_count = 0
-# for image in LIST_OF_POSSIBLE_TESTING_IMAGES:
-#     if image.find("(1)") != -1:
-#         duplicated_count +=1
-# print(duplicated_count)
+# do cnn shit
+# profit?