<a href="https://colab.research.google.com/github/WKhisa/Unit-Testing-for-Telecommunication-Billing-Data-Pipeline/blob/main/Unit_Testing_for_Telecommunication_Billing_Data_Pipeline_IPP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Below is the starting code that includes the data pipeline functions. You should focus on writing unit tests for these functions using the unittest framework.



In [None]:
import pandas as pd
import unittest

file_path = 'https://raw.githubusercontent.com/WKhisa/Unit-Testing-for-Telecommunication-Billing-Data-Pipeline/main/billing_data.csv'

def data_extraction(file_path):
    data = pd.read_csv(file_path)
    return data

def data_transformation(data):
    data = data.drop_duplicates()
    data['billing_amount'] = data['billing_amount'].str.replace('$', '').astype(float)
    data['total_charges'] = data['billing_amount'] + data['tax_amount']
    return data

def data_loading(data, output_file):
    data.to_csv(output_file, index=False)

class TestDataPipeline(unittest.TestCase):
    def test_data_extraction(self):
        data_file = fetch_data_from_url(DATA_URL)
        extracted_data = data_extraction(data_file)
        self.assertIsInstance(extracted_data, pd.DataFrame)
        self.assertEqual(len(extracted_data), 5)

    def test_data_transformation(self):
        # Create a sample DataFrame for testing
        data = pd.DataFrame({'billing_amount': ['$100', '$200', '$300', '$400', '$500'],
                             'tax_amount': [10, 20, 30, 40, 50]})
        # Test if data_transformation transforms data as expected
        transformed_data = data_transformation(data)

        # Check if 'billing_amount' has been transformed correctly
        self.assertAlmostEqual(transformed_data['billing_amount'].iloc[0], 100.0)
        self.assertAlmostEqual(transformed_data['billing_amount'].iloc[4], 500.0)

        # Check if 'tax_amount' has been transformed correctly
        self.assertAlmostEqual(transformed_data['tax_amount'].iloc[0], 10.0)
        self.assertAlmostEqual(transformed_data['tax_amount'].iloc[4], 50.0)

        # Check if 'total_charges' has been calculated correctly
        self.assertAlmostEqual(transformed_data['total_charges'].iloc[0], 110.0)
        self.assertAlmostEqual(transformed_data['total_charges'].iloc[4], 550.0)


    def test_data_loading(self):
        # Create a sample DataFrame for testing
        data = pd.DataFrame({'billing_amount': ['$100', '$200', '$300', '$400', '$500'],
                             'tax_amount': [10, 20, 30, 40, 50]})

        # Create a temporary output file for testing
        output_file = 'test_output.csv'

        # Test if data_loading saves data to a CSV file
        data_loading(data, output_file)

        # Check if the output file exists
        self.assertTrue(os.path.exists(output_file))

        # Clean up: Remove the temporary output file
        os.remove(output_file)

if __name__ == '__main__':
    unittest.main()