# unittest 1

* Testing the outlierdrop.py file originaly in the statswranger module

In [8]:
import statswrangler.outlierdrop as od
import pandas as pd

In [9]:
df = pd.read_csv("train.csv")

#data cleaning for test case checking later

df = df[0:100] #dataframe (will only use a subset dataset too big)
df = df.drop("PoolQC", axis = 1)
s1 = df["SalePrice"] #series 1 using column SalePrice
s2 = df["LotArea"] #series 2 using LotArea

nooutliers1 = pd.read_csv("withnooutliers.csv", index_col = "Unnamed: 0")#this dataset has the outliers in SalePrice columnn removed
nooutliers1 = nooutliers1.drop("PoolQC", axis =1)
#will be used in our test case as correct resutlt


#threshold =2 only 2 oultliers removed
nooutliers2 = pd.read_csv("withnooutliers2.csv",index_col = "Unnamed: 0")#, index_col = "Unnamed: 0")#this dataset has the outliers in SalePrice columnn removed

In [10]:
import unittest
from pandas.util.testing import assert_frame_equal

In [11]:
class TestOutlierdrop(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        print("setUpClass")
    @classmethod
    def tearDownClass(cls):
        print('teardownClass')
    def setUp(self):
        self.d1 = od.Data(s1,df)
        self.d2 = od.Data(s2,df)
        self.d3 = od.Dropped(s1,df)
        self.d4 = od.Dropped(s2,df)
        self.d5 = od.Dropped(s1,df, threshold = 2) #with threshold = 2
        print('Set Up')
        
    def tearDown(self):
        print('Tear Down')
        
    def test_Outliers(self): #test case 
        self.assertEqual(self.d1.Outliers(), ([11, 53, 58], [345000, 385000, 438780])) 
        self.assertEqual(self.d2.Outliers(), ([41, 53, 66, 75], [16905, 50271, 19900, 1596]))

    def test_DropOutliers(self): #test case
        #check the Outlier function could be used since there is inheritance
        self.assertEqual(self.d3.Outliers(), ([11, 53, 58], [345000, 385000, 438780]))
        self.assertEqual(self.d4.Outliers(), ([41, 53, 66, 75], [16905, 50271, 19900, 1596]))
        
        assert_frame_equal(self.d3.DropOutliers(), nooutliers1) #test with pandas testing for dataframe
        assert_frame_equal(self.d5.DropOutliers(), nooutliers2) #test with pandas testing for dataframe
    
    
unittest.main(argv=[''], verbosity=2, exit=False)   

test_DropOutliers (__main__.TestOutlierdrop) ... ok
test_Outliers (__main__.TestOutlierdrop) ... ok
test_SplitTrain (__main__.TestTraining) ... 

setUpClass
Set Up
Tear Down
Set Up
Tear Down
teardownClass
setUpClass
Set Up
Tear Down
teardownClass


ok

----------------------------------------------------------------------
Ran 3 tests in 0.256s

OK


<unittest.main.TestProgram at 0x10a221a90>

# unittest 2
* testing the training.py file from the statswrangler module

In [12]:
import statswrangler.training as tr
import pandas as pd
import unittest
from pandas.util.testing import assert_frame_equal


#since it uses the .sample() function it is required to set seed or df.sample(random_state=1) to set seed as 1 
#we will have to alter the module or else the test would fail 


# In[109]:


df0 = pd.read_csv("train.csv")
df1 = df0[0:30] #dataframe (will only use a subset dataset too big)


#some data cleaning before using the dataset
df1 = df1.drop("PoolQC", axis = 1) #since mostly NaN
df1 = df1.drop("Alley", axis = 1) #since mostly NaN
df1 = df1.drop("MiscFeature", axis = 1) #since mostly NaN

df2 = df0[30:60]
df2 = df2.drop("PoolQC", axis = 1) 
df2 = df2.drop("Alley", axis = 1)
df2 = df2.drop("MiscFeature", axis = 1)

#testing data for the test class
tr1 = pd.read_csv("train1.csv", index_col="Unnamed: 0")
tt1 = pd.read_csv("test1.csv", index_col="Unnamed: 0")

tr2 = pd.read_csv("train2.csv", index_col="Unnamed: 0")
tt2 = pd.read_csv("test2.csv", index_col="Unnamed: 0")

tr3 = pd.read_csv("train3.csv", index_col="Unnamed: 0")
tt3 = pd.read_csv("test3.csv", index_col="Unnamed: 0")


# In[110]:


class TestTraining(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        print("setUpClass")
        
    @classmethod
    def tearDownClass(cls):
        print('teardownClass')
    def setUp(self):
        self.train1, self.test1 = tr.TrainingData(df1).SplitTrain()
        self.train2, self.test2 = tr.TrainingData(df2).SplitTrain()
        self.train3, self.test3 = tr.TrainingData(df2,0.8).SplitTrain()
        print('Set Up')
        
    def tearDown(self):
        print('Tear Down')

    def test_SplitTrain(self): #test case
        assert_frame_equal(self.train1, tr1) #test with pandas testing for dataframe
        assert_frame_equal(self.test1, tt1) 
        
        assert_frame_equal(self.train2, tr2) 
        assert_frame_equal(self.test2, tt2) 
        
        assert_frame_equal(self.train3, tr3) 
        assert_frame_equal(self.test3, tt3) 
        
unittest.main(argv=[''], verbosity=2, exit=False)   


# In[ ]:

test_DropOutliers (__main__.TestOutlierdrop) ... ok
test_Outliers (__main__.TestOutlierdrop) ... 

setUpClass
Set Up
Tear Down
Set Up


ok
test_SplitTrain (__main__.TestTraining) ... 

Tear Down
teardownClass
setUpClass
Set Up
Tear Down
teardownClass


ok

----------------------------------------------------------------------
Ran 3 tests in 0.253s

OK


<unittest.main.TestProgram at 0x10a1ffa90>

# Test suite 

In [13]:
def my_suite():
    suite = unittest.TestSuite()
    result = unittest.TestResult()
    suite.addTest(unittest.makeSuite(TestTraining))
    suite.addTest(unittest.makeSuite(TestOutlierdrop))
    runner = unittest.TextTestRunner()
    print(runner.run(suite))
my_suite()

setUpClass
Set Up


.

Tear Down
teardownClass
setUpClass
Set Up


..

Tear Down
Set Up
Tear Down
teardownClass
<unittest.runner.TextTestResult run=3 errors=0 failures=0>



----------------------------------------------------------------------
Ran 3 tests in 0.244s

OK
