# unittest 1

* Testing the outlierdrop.py file originaly in the statswranger module

In [61]:
import statswrangler.outlierdrop as od
import pandas as pd

import unittest
from pandas.util.testing import assert_frame_equal #this has to be imported to check dataframes

# importing data
* I imported dataframes that would be used to check the module

In [62]:
df = pd.read_csv("train.csv")

#data cleaning for test case checking later

df = df[0:100] #dataframe (will only use a subset dataset too big)

df = df.drop("PoolQC", axis = 1) #drop this columns since mostly NaN's

s1 = df["SalePrice"] #series 1 using column SalePrice
s2 = df["LotArea"] #series 2 using LotArea


#nooutliers1 and nooutliers2 will be used in our check cases

nooutliers1 = pd.read_csv("withnooutliers.csv", index_col = "Unnamed: 0")
#this dataset has already been cleaned so that outliers in SalePrice columnn have been removed
nooutliers1 = nooutliers1.drop("PoolQC", axis =1) #remove the PoolQC column since mostly NaN's


#nooutliers2 will refer to the testcase where threshold is 2
#this only removes 2 out of the 3 outliers in the dataset
#this dataset has the outliers in SalePrice columnn removed
nooutliers2 = pd.read_csv("withnooutliers2.csv",index_col = "Unnamed: 0")


In [63]:
class TestOutlierdrop(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        print("setUpClass")
    
    def setUp(self):
        self.d1 = od.Data(s1,df)
        self.d2 = od.Data(s2,df)
        self.d3 = od.Dropped(s1,df)
        self.d4 = od.Dropped(s2,df)
        self.d5 = od.Dropped(s1,df, threshold = 2) #with threshold = 2
        
        self.a1, self.b1 = self.d1.Outliers() 
        
        
        print('Set Up')
        
    def tearDown(self):
        print('Tear Down')
        
    def test_Outliers(self): #test case 
       
        self.assertIs(type(self.d1.Outliers()), type((2,3))) #check that it returns a tuple
        
        #check that they are lists, since the function that inherits this class will can only take lists 
        self.assertIs(type(self.a1) and type(self.b1), type([])) 
         
        
        #check for equal
        self.assertEqual(self.d1.Outliers(), ([11, 53, 58], [345000, 385000, 438780])) 
        self.assertEqual(self.d2.Outliers(), ([41, 53, 66, 75], [16905, 50271, 19900, 1596]))
        
        #check outliers in the series if they match the index
        for i in self.a1:
            self.assertIn(s1[i], self.b1)
            

        
    def test_DropOutliers(self): #test case
        #check the Outlier function could be used since there is inheritance
        self.assertEqual(self.d3.Outliers(), ([11, 53, 58], [345000, 385000, 438780]))
        self.assertEqual(self.d4.Outliers(), ([41, 53, 66, 75], [16905, 50271, 19900, 1596]))
        
        #check
        self.assertIs(type(self.d3.DropOutliers()), type(df)) #check that it returns a dataframe
        
        #I will only do two of the dataframe equilvalence checks since it's slightly complicated to clean data
        assert_frame_equal(self.d3.DropOutliers(), nooutliers1) #test with pandas testing for dataframe
        assert_frame_equal(self.d5.DropOutliers(), nooutliers2) #test with pandas testing for dataframe
        
    @classmethod
    def tearDownClass(cls):
        print('teardownClass')
    
unittest.main(argv=[''], verbosity=2, exit=False)   

test_DropOutliers (__main__.TestOutlierdrop) ... ok
test_Outliers (__main__.TestOutlierdrop) ... ok
test_SplitTrain (__main__.TestTraining) ... 

setUpClass
Set Up
Tear Down
Set Up
Tear Down
teardownClass
setUpClass
Set Up
Tear Down
teardownClass


ok

----------------------------------------------------------------------
Ran 3 tests in 0.267s

OK


<unittest.main.TestProgram at 0x1147e1e48>

# unittest 2
* testing the training.py file from the statswrangler module

In [64]:
import statswrangler.training as tr
import pandas as pd
import unittest
from pandas.util.testing import assert_frame_equal

# Import data
* there would be data imported for the class
* the multiple test and train data imported would be used to test the SplitTrain function, which splits a dataset after shuffling into a train and test set. 

In [65]:
#since training.py uses the .sample() function it is required to set seed or df.sample(random_state=1) to set seed as 1
#I have already modified the training.py module code so that it is df.sample(random_state=1)
#or else it will always be checking a random result, which would result in a Fail 


df0 = pd.read_csv("train.csv")
df1 = df0[0:30] #dataframe (will only use a subset dataset too big)


#some data cleaning before using the dataset
df1 = df1.drop("PoolQC", axis = 1) #since mostly NaN
df1 = df1.drop("Alley", axis = 1) #since mostly NaN
df1 = df1.drop("MiscFeature", axis = 1) #since mostly NaN

df2 = df0[30:60]
df2 = df2.drop("PoolQC", axis = 1) 
df2 = df2.drop("Alley", axis = 1)
df2 = df2.drop("MiscFeature", axis = 1)

#testing data for the test class
tr1 = pd.read_csv("train1.csv", index_col="Unnamed: 0")
tt1 = pd.read_csv("test1.csv", index_col="Unnamed: 0")

tr2 = pd.read_csv("train2.csv", index_col="Unnamed: 0")
tt2 = pd.read_csv("test2.csv", index_col="Unnamed: 0")

tr3 = pd.read_csv("train3.csv", index_col="Unnamed: 0")
tt3 = pd.read_csv("test3.csv", index_col="Unnamed: 0")


# In[110]:


class TestTraining(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        print("setUpClass")
        
    
    def setUp(self):
        self.together = tr.TrainingData(df1).SplitTrain()
        self.train1, self.test1 = tr.TrainingData(df1).SplitTrain()
        self.train2, self.test2 = tr.TrainingData(df2).SplitTrain()
        self.train3, self.test3 = tr.TrainingData(df2,0.8).SplitTrain()
        print('Set Up')
        
    

    def test_SplitTrain(self): #test case
        #check if the function SplitTrain() returns a tuple 
        self.assertIs(type(self.together), type((2,2)) )
        
        #check if what is inside the tuple is a dataframe
        self.assertIs(type(self.train1), type(df1) )
        
        #test with pandas testing for dataframe
        #test each training and test set
        assert_frame_equal(self.train1, tr1)
        assert_frame_equal(self.test1, tt1) 
        
        assert_frame_equal(self.train2, tr2) 
        assert_frame_equal(self.test2, tt2) 
        
        assert_frame_equal(self.train3, tr3) 
        assert_frame_equal(self.test3, tt3) 
        
    def tearDown(self):
        print('Tear Down')
        
    @classmethod
    def tearDownClass(cls):
        print('teardownClass')
unittest.main(argv=[''], verbosity=2, exit=False)   

test_DropOutliers (__main__.TestOutlierdrop) ... ok
test_Outliers (__main__.TestOutlierdrop) ... ok
test_SplitTrain (__main__.TestTraining) ... 

setUpClass
Set Up
Tear Down
Set Up
Tear Down
teardownClass
setUpClass
Set Up
Tear Down
teardownClass


ok

----------------------------------------------------------------------
Ran 3 tests in 0.276s

OK


<unittest.main.TestProgram at 0x11481abe0>

# Test suite 

In [66]:
def my_suite():
    suite = unittest.TestSuite()
    result = unittest.TestResult()
    suite.addTest(unittest.makeSuite(TestTraining))
    suite.addTest(unittest.makeSuite(TestOutlierdrop))
    runner = unittest.TextTestRunner()
    print(runner.run(suite))
my_suite()

.

setUpClass
Set Up
Tear Down
teardownClass
setUpClass
Set Up


.

Tear Down


.

Set Up
Tear Down
teardownClass
<unittest.runner.TextTestResult run=3 errors=0 failures=0>



----------------------------------------------------------------------
Ran 3 tests in 0.254s

OK
