# Testing

In [20]:
import unittest
import os
import pandas as pd
from itertools import islice

In [23]:
class PTestCases(unittest.TestCase):
    
    def test_for_file(self):
        #read data into csv
        csv = pd.read_csv('raw_data.csv', dtype = {'category': 'category', 'main_category': 'category', 'state': 'category'}, index_col = 0)
        #get column headers
        head = list(islice(csv,14))
        #see if headers match
        self.assertEqual(head, ['name', 'category', 'main_category', 'currency', 'deadline', 'goal', 'launched', 'pledged', 'state', 'backers', 'country', 'usd pledged', 'usd_pledged_real', 'usd_goal_real'], 'not working!')
        #see if length matches
        self.assertTrue(len(csv) == 378661, 'Not working')
        
    def test_for_filter(self):
        #read data into csv
        csv = pd.read_csv('raw_data.csv', dtype = {'category': 'category', 'main_category': 'category', 'state': 'category'}, index_col = 0)
        #get column headers
        head = list(islice(csv,14))
        #print(head)
        #filter by removing unnecessary rows/columns
        data = csv.query('country == "US"').loc[:, ['category', 'main_category', 'deadline', 'goal', 'launched', 'pledged', 'state', 'backers']].query('state == "successful" or state == "failed"')
        #get headers for filtered data
        head2 = list(islice(data,10))
        #get the removed headers
        head3 = [x for x in head if x not in head2]
        #check if column headers match in filtered data
        self.assertEqual(head2, ['category', 'main_category', 'deadline', 'goal', 'launched', 'pledged', 'state', 'backers'])
                
        
    def test_for_rename_categories(self):
        csv = pd.read_csv('raw_data.csv', dtype = {'category': 'category', 'main_category': 'category', 'state': 'category'}, index_col = 0)
        data = csv.query('country == "US"').loc[:, ['category', 'main_category', 'deadline', 'goal', 'launched', 'pledged', 'state', 'backers']].query('state == "successful" or state == "failed"')
        data['state'] = data['state'].cat.remove_unused_categories().cat.rename_categories([0, 1])
        #check if the values in the column has been renamed
        self.assertEqual(data['state'].cat.categories.tolist(), [0, 1], 'Error!')
        
        
        
    def test_write_to_csv(self):
        csv = pd.read_csv('raw_data.csv', dtype = {'category': 'category', 'main_category': 'category', 'state': 'category'}, index_col = 0)
        data = csv.query('country == "US"').loc[:, ['category', 'main_category', 'deadline', 'goal', 'launched', 'pledged', 'state', 'backers']].query('state == "successful" or state == "failed"')
        data['state'] = data['state'].cat.remove_unused_categories().cat.rename_categories([0, 1])
        data = data.rename(columns={'category': 'Sub_Category', 'main_category': 'Main_Category'})
        # convert launched and deadline to yyyymmdd format
        data['launched'] = pd.to_datetime(data['launched']).dt.strftime('%Y%m%d')
        data['deadline'] = pd.to_datetime(data['deadline']).dt.strftime('%Y%m%d')
        #add columns
        data['ratio'] = data['pledged']*100/data['goal']
        data['launched_month'] = (pd.to_datetime(data['launched']).dt.month.astype('category'))
        data['deadline_month'] = (pd.to_datetime(data['deadline']).dt.month.astype('category'))
        data['launched_year'] = (pd.to_datetime(data['launched']).dt.year.astype('category'))
        data['deadline_year'] = (pd.to_datetime(data['deadline']).dt.year.astype('category'))
        data['days'] = (pd.to_datetime(data['deadline']).dt.date - pd.to_datetime(data['launched']).dt.date).dt.days
        #write the data to csv
        data.to_csv('test_data.csv')
        
        #check if the file exists in the directory
        self.assertTrue((os.path.isfile('test_data.csv') == True), 'not working!')


In [22]:
t1 = PTestCases()
t1.test_for_file()
t1.test_for_filter()
t1.test_for_rename_categories()
t1.test_write_to_csv()