Skip to content
Browse files

ML class in progress

  • Loading branch information...
dermatologist committed Mar 13, 2019
1 parent 3badac4 commit a7e7d9459e9a1840f0c215a37c6fdb1401eaa1e4
Showing with 57 additions and 6 deletions.
  1. +57 −6 src/ml_qrmine/
@@ -1,29 +1,80 @@
# This is for oversampling
from pandas import read_csv

import numpy
from imblearn.over_sampling import RandomOverSampler
from keras.layers import Dense
from keras.models import Sequential

class MLQRMine(object):

def __init__(self):
self._seed = 7
self._csvfile = ""
self._dataset = None
self._X = None
self._y = None
self._X_original = None
self._y_original = None
self._dataset_original = None

def seed(self):
return self._seed

def seed(self, seed):
self._seed = seed

def csvfile(self):
return self._csvfile

def dataset(self):
return self._dataset

def X(self):
return self._X

def y(self):
return self._y

# Getters should be before setters*
def seed(self, seed):
self._seed = seed

def csvfile(self, csvfile):
self._csvfile = csvfile

# Functions
def read_csv(self):
self._dataset = read_csv(self._csvfile, header=1)

def mark_missing(self):
self._dataset_original = self._dataset
self._dataset = self._dataset.replace('', numpy.NaN)

def restore_mark_missing(self):
self._dataset = self._dataset_original

def get_shape(self):
return self._dataset.shape

def read_xy(self):
(sample, vnum) = self._dataset.shape
# Last column in the csv should be the DV (So get the number of variables)
vnum = vnum - 1
# splice into IVs and DV
values = self._dataset.values
self._X = values[:, 0:vnum]
self._y = values[:, vnum]

def oversample(self):
self._X_original = self._X
self._y_original = self._y
ros = RandomOverSampler(random_state=0)
X, y = ros.fit_sample(X, y)

def restore_oversample(self):
self._X = self._X_original
self._y = self._y_original

0 comments on commit a7e7d94

Please sign in to comment.
You can’t perform that action at this time.