Skip to content
This repository has been archived by the owner on Jun 10, 2020. It is now read-only.

Commit

Permalink
pep8 is bad
Browse files Browse the repository at this point in the history
  • Loading branch information
ShahNafisRafique committed Dec 2, 2019
1 parent 427e61f commit f316a1a
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 73 deletions.
4 changes: 2 additions & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"python.pythonPath": "C:\\Users\\skool\\.virtualenvs\\Poststorm_Imagery-AfucPPEW\\Scripts\\python.exe"
}
"python.pythonPath": "C:\\Users\\namenai\\.virtualenvs\\Poststorm_Imagery-ZWyQiJON\\Scripts\\python.exe"
}
3 changes: 2 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,5 @@ imageio = "*"
matplotlib = "*"
scipy = "*"
opencv-python = "*"
glob = "*"
scikit-learn = "*"
tensorflow = "*"
86 changes: 16 additions & 70 deletions src/python/psic/stats/tagging_stats/cnn_impact.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
# Imports.

import pandas as pd
import jsonpickle
import numpy as np
import matplotlib.pyplot as plt
import os, os.path
from os import path
import dateutil.parser
from datetime import datetime
import statistics
import glob
import random
# import numpy as np
# import matplotlib.pyplot as plt
import os
# from os import path
from sklearn.model_selection import train_test_split
# from tensorflow import keras
# from keras.preprocessing.image import ImageDataGenerator

# Just a lil comment to rememebr wat each number for impact means
# NoneId:0,
Expand All @@ -22,68 +19,17 @@
# Global Constants.

SELF_PATH = os.getcwd()
PATH_TO_FILE_STREAM = 'G:\Shared drives\P-Sick'
PATH_TO_TRAINING_IMAGES = os.path.join(PATH_TO_FILE_STREAM,'data\Florence/20180917a_jpgs')
PATH_TO_TESTING_IMAGES = os.path.join(PATH_TO_FILE_STREAM,'data\Florence/20180918a_jpgs')
TEST_IMAGE_SIZE = 30
PATH_TO_FILE_STREAM = 'G:/Shared drives/P-Sick'
PATH_TO_IMAGES = os.path.join(PATH_TO_FILE_STREAM, 'small/Florence/20180917a_jpgs')
PATH_TO_TAG_CSV = os.path.join(SELF_PATH, '../tagging_data.csv')

PATH_TO_TAG_CSV = os.path.join(SELF_PATH,'../tagging_data.csv')
TEST_TO_TRAIN_RATIO = 0.3

# First lets load the csv that has all the completely tagged image tags.
df_image_tags = pd.read_csv(PATH_TO_TAG_CSV)
# create training and test set
tagged_image_list = df_image_tags['image_id'].tolist()
training_images, testing_images = train_test_split(tagged_image_list, test_size=TEST_TO_TRAIN_RATIO, random_state=42)

# Get a series of the images
series_images = df_image_tags['image_id']

# Check if all the images in the series exist
# all_exist = True
# count = 0
# not_exist = 0
# for image in series_images:
# count += 1
# all_exist = all_exist and path.exists(os.path.join(PATH_TO_TRAINING_IMAGES,image))
# if not path.exists(os.path.join(PATH_TO_TRAINING_IMAGES,image)):
# not_exist += 1

# if all_exist:
# print(f'All {count} images exist')
# else:
# print(f'Of {count} images, {not_exist} dont exist')

#glob.glob("G:\\Shared drives\\P-Sick\\data\\Florence/20180917a_jpgs/jpgs/*.jpg")

LIST_OF_POSSIBLE_TESTING_IMAGES = glob.glob(f"{PATH_TO_TESTING_IMAGES}/jpgs/*.jpg")
# print(len(LIST_OF_POSSIBLE_TESTING_IMAGES))
# print(type(LIST_OF_POSSIBLE_TESTING_IMAGES))

LIST_OF_TRAINING_IMAGES = []

selected_training_image_count = 0
while (selected_training_image_count < TEST_IMAGE_SIZE):
secure_random = random.SystemRandom()
selected_image = secure_random.choice(LIST_OF_POSSIBLE_TESTING_IMAGES)

# # Make sure we dont select a duplicated file that has (1) in name
if selected_image.find("(1)") == -1:
split_image_name = selected_image.split("\\")[-1]
# Make sure we havent added this image
if split_image_name not in LIST_OF_TRAINING_IMAGES:
# Use split just to get the file name
LIST_OF_TRAINING_IMAGES.append(split_image_name)
selected_training_image_count+=1

# Just incase, drop any duplicates
LIST_OF_TRAINING_IMAGES = list( dict.fromkeys(LIST_OF_TRAINING_IMAGES) )

print('--- Training Set ---')
print(series_images.head())
print('\n')

print('--- Testing Set ---')
print(LIST_OF_TRAINING_IMAGES)
# finding how many duplicates
# duplicated_count = 0
# for image in LIST_OF_POSSIBLE_TESTING_IMAGES:
# if image.find("(1)") != -1:
# duplicated_count +=1
# print(duplicated_count)
# do cnn shit
# profit?

0 comments on commit f316a1a

Please sign in to comment.