In [32]:
import pyspark

In [33]:
from pyspark.sql import SparkSession

In [34]:
spark = SparkSession.builder.appName('vehicle_ins').getOrCreate()

In [35]:
spark.conf.set("spark.sql.repl.eagerEval", True)

In [36]:
df_vehicle = spark.read.csv('vehicle_ins.csv', inferSchema = True, header = True)

In [37]:
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

In [38]:
df_vehicle.show()

+-----+-----------+---------+---------+------------+----------------+------------+------------------+------+-------------+---+-------------+--------------------+---------------+---------------+------------+------------+---------+----------+------------+--------------------+-----------------+------------------+------------+-----------------+-----------------+--------------+---------+-------------------+-------------------+------------+----+----------+
|Month|WeekOfMonth|DayOfWeek|     Make|AccidentArea|DayOfWeekClaimed|MonthClaimed|WeekOfMonthClaimed|   Sex|MaritalStatus|Age|        Fault|          PolicyType|VehicleCategory|   VehiclePrice|FraudFound_P|PolicyNumber|RepNumber|Deductible|DriverRating|Days_Policy_Accident|Days_Policy_Claim|PastNumberOfClaims|AgeOfVehicle|AgeOfPolicyHolder|PoliceReportFiled|WitnessPresent|AgentType|NumberOfSuppliments|AddressChange_Claim|NumberOfCars|Year|BasePolicy|
+-----+-----------+---------+---------+------------+----------------+------------+--------

In [39]:
print((df_vehicle.count(), len(df_vehicle.columns)))

(15420, 33)


Checking for Null values

In [69]:
from pyspark.sql.functions import isnull, when, count, col

df_vehicle.select([count(when(isnan(c) | col(c).isNull(), c)).alias(c) for c in df_vehicle.columns]).show()

+-----+-----------+---------+----+------------+----------------+------------+------------------+---+-------------+---+-----+----------+---------------+------------+------------+------------+---------+----------+------------+--------------------+-----------------+------------------+------------+-----------------+-----------------+--------------+---------+-------------------+-------------------+------------+----+----------+
|Month|WeekOfMonth|DayOfWeek|Make|AccidentArea|DayOfWeekClaimed|MonthClaimed|WeekOfMonthClaimed|Sex|MaritalStatus|Age|Fault|PolicyType|VehicleCategory|VehiclePrice|FraudFound_P|PolicyNumber|RepNumber|Deductible|DriverRating|Days_Policy_Accident|Days_Policy_Claim|PastNumberOfClaims|AgeOfVehicle|AgeOfPolicyHolder|PoliceReportFiled|WitnessPresent|AgentType|NumberOfSuppliments|AddressChange_Claim|NumberOfCars|Year|BasePolicy|
+-----+-----------+---------+----+------------+----------------+------------+------------------+---+-------------+---+-----+----------+-------------

See all the distinct values in every column

In [41]:
for column in df_vehicle: 
    df_vehicle.select(column).distinct().show(100)

+-----+
|Month|
+-----+
|  Oct|
|  Sep|
|  Dec|
|  Aug|
|  May|
|  Jun|
|  Feb|
|  Nov|
|  Mar|
|  Jan|
|  Apr|
|  Jul|
+-----+

+-----------+
|WeekOfMonth|
+-----------+
|          1|
|          3|
|          5|
|          4|
|          2|
+-----------+

+---------+
|DayOfWeek|
+---------+
|Wednesday|
|  Tuesday|
|   Friday|
| Thursday|
| Saturday|
|   Monday|
|   Sunday|
+---------+

+---------+
|     Make|
+---------+
|    Lexus|
|   Jaguar|
|   Saturn|
|   Nisson|
|   Porche|
|Chevrolet|
|     Saab|
|    Honda|
|  Pontiac|
|  Mercury|
|      BMW|
|   Accura|
|  Ferrari|
|       VW|
|  Mecedes|
|   Toyota|
|    Dodge|
|    Mazda|
|     Ford|
+---------+

+------------+
|AccidentArea|
+------------+
|       Urban|
|       Rural|
+------------+

+----------------+
|DayOfWeekClaimed|
+----------------+
|       Wednesday|
|         Tuesday|
|               0|
|          Friday|
|        Thursday|
|        Saturday|
|          Monday|
|          Sunday|
+----------------+

+------------+

Checking the rows with zeroes.

In [42]:
df_vehicle.where(df_vehicle.DayOfWeekClaimed == '0').show()
df_vehicle.where(df_vehicle.MonthClaimed == '0').show()

+-----+-----------+---------+-----+------------+----------------+------------+------------------+----+-------------+---+-------------+------------------+---------------+---------------+------------+------------+---------+----------+------------+--------------------+-----------------+------------------+------------+-----------------+-----------------+--------------+---------+-------------------+-------------------+------------+----+----------+
|Month|WeekOfMonth|DayOfWeek| Make|AccidentArea|DayOfWeekClaimed|MonthClaimed|WeekOfMonthClaimed| Sex|MaritalStatus|Age|        Fault|        PolicyType|VehicleCategory|   VehiclePrice|FraudFound_P|PolicyNumber|RepNumber|Deductible|DriverRating|Days_Policy_Accident|Days_Policy_Claim|PastNumberOfClaims|AgeOfVehicle|AgeOfPolicyHolder|PoliceReportFiled|WitnessPresent|AgentType|NumberOfSuppliments|AddressChange_Claim|NumberOfCars|Year|BasePolicy|
+-----+-----------+---------+-----+------------+----------------+------------+------------------+----+----

In [43]:
df_vehicle.where(df_vehicle.Age == 0).show()
df_vehicle.where(df_vehicle.Age == 0).count()

+-----+-----------+---------+-----+------------+----------------+------------+------------------+----+-------------+---+-------------+------------------+---------------+---------------+------------+------------+---------+----------+------------+--------------------+-----------------+------------------+------------+-----------------+-----------------+--------------+---------+-------------------+-------------------+------------+----+----------+
|Month|WeekOfMonth|DayOfWeek| Make|AccidentArea|DayOfWeekClaimed|MonthClaimed|WeekOfMonthClaimed| Sex|MaritalStatus|Age|        Fault|        PolicyType|VehicleCategory|   VehiclePrice|FraudFound_P|PolicyNumber|RepNumber|Deductible|DriverRating|Days_Policy_Accident|Days_Policy_Claim|PastNumberOfClaims|AgeOfVehicle|AgeOfPolicyHolder|PoliceReportFiled|WitnessPresent|AgentType|NumberOfSuppliments|AddressChange_Claim|NumberOfCars|Year|BasePolicy|
+-----+-----------+---------+-----+------------+----------------+------------+------------------+----+----

320

In [44]:
df_vehicle.where(df_vehicle.MonthClaimed == '0').show()

+-----+-----------+---------+-----+------------+----------------+------------+------------------+----+-------------+---+-------------+------------------+---------------+---------------+------------+------------+---------+----------+------------+--------------------+-----------------+------------------+------------+-----------------+-----------------+--------------+---------+-------------------+-------------------+------------+----+----------+
|Month|WeekOfMonth|DayOfWeek| Make|AccidentArea|DayOfWeekClaimed|MonthClaimed|WeekOfMonthClaimed| Sex|MaritalStatus|Age|        Fault|        PolicyType|VehicleCategory|   VehiclePrice|FraudFound_P|PolicyNumber|RepNumber|Deductible|DriverRating|Days_Policy_Accident|Days_Policy_Claim|PastNumberOfClaims|AgeOfVehicle|AgeOfPolicyHolder|PoliceReportFiled|WitnessPresent|AgentType|NumberOfSuppliments|AddressChange_Claim|NumberOfCars|Year|BasePolicy|
+-----+-----------+---------+-----+------------+----------------+------------+------------------+----+----

Monthclaimed and DayOfWeekClaimed has a value of zero in the same row. We can remove this row from the dataset.
There are 320 rows with value of Age column as zero. The ages may be recorded as zeroes in these cases, either 
because the exact ages of the drivers were not known or the drivers were under age. Either way we can remove the
rows containing age as zero.

In [45]:
df = df_vehicle.filter(df_vehicle.MonthClaimed != 0)
df = df_vehicle.filter(df_vehicle.Age != 0)

Number of rows after removing rows with zero values

In [46]:
df.count()

15100

We can drop the PolciyNumber column as it may not be useful in the model if they are all unique numbers.

In [47]:
from pyspark.sql.functions import countDistinct
df.select(countDistinct("PolicyNumber")).show()

+----------------------------+
|count(DISTINCT PolicyNumber)|
+----------------------------+
|                       15100|
+----------------------------+



Dropping the Policy Number column as they are basically Policy IDs and there are no duplicates.

In [48]:
df = df.drop("PolicyNumber")
df.show()

+-----+-----------+---------+---------+------------+----------------+------------+------------------+------+-------------+---+-------------+--------------------+---------------+---------------+------------+---------+----------+------------+--------------------+-----------------+------------------+------------+-----------------+-----------------+--------------+---------+-------------------+-------------------+------------+----+----------+
|Month|WeekOfMonth|DayOfWeek|     Make|AccidentArea|DayOfWeekClaimed|MonthClaimed|WeekOfMonthClaimed|   Sex|MaritalStatus|Age|        Fault|          PolicyType|VehicleCategory|   VehiclePrice|FraudFound_P|RepNumber|Deductible|DriverRating|Days_Policy_Accident|Days_Policy_Claim|PastNumberOfClaims|AgeOfVehicle|AgeOfPolicyHolder|PoliceReportFiled|WitnessPresent|AgentType|NumberOfSuppliments|AddressChange_Claim|NumberOfCars|Year|BasePolicy|
+-----+-----------+---------+---------+------------+----------------+------------+------------------+------+--------

In [49]:
from pyspark.ml.feature import StringIndexer, OneHotEncoder

SI_Month = StringIndexer(inputCol='Month',outputCol='Month_Index')
SI_DayOfWeek = StringIndexer(inputCol='DayOfWeek',outputCol='DayOfWeek_Index')
SI_Make = StringIndexer(inputCol='Make',outputCol='Make_Index')
SI_AccidentArea = StringIndexer(inputCol='AccidentArea',outputCol='AccidentArea_Index')
SI_DayOfWeekClaimed = StringIndexer(inputCol='DayOfWeekClaimed',outputCol='DayOfWeekClaimed_Index')
SI_MonthClaimed = StringIndexer(inputCol='MonthClaimed',outputCol='MonthClaimed_Index')
SI_Sex = StringIndexer(inputCol='Sex',outputCol='Sex_Index')
SI_MaritalStatus = StringIndexer(inputCol='MaritalStatus',outputCol='MaritalStatus_Index')
SI_Fault = StringIndexer(inputCol='Fault',outputCol='Fault_Index')
SI_PolicyType = StringIndexer(inputCol='PolicyType',outputCol='PolicyType_Index')
SI_VehicleCategory = StringIndexer(inputCol='VehicleCategory',outputCol='VehicleCategory_Index')
SI_VehiclePrice = StringIndexer(inputCol='VehiclePrice',outputCol='VehiclePrice_Index')
SI_Days_Policy_Accident = StringIndexer(inputCol='Days_Policy_Accident',outputCol='Days_Policy_Accident_Index')
SI_Days_Policy_Claim = StringIndexer(inputCol='Days_Policy_Claim',outputCol='Days_Policy_Claim_Index')
SI_PastNumberOfClaims = StringIndexer(inputCol='PastNumberOfClaims',outputCol='PastNumberOfClaims_Index')
SI_AgeOfVehicle = StringIndexer(inputCol='AgeOfVehicle',outputCol='AgeOfVehicle_Index')
SI_AgeOfPolicyHolder = StringIndexer(inputCol='AgeOfPolicyHolder',outputCol='AgeOfPolicyHolder_Index')
SI_PoliceReportFiled = StringIndexer(inputCol='PoliceReportFiled',outputCol='PoliceReportFiled_Index')
SI_WitnessPresent = StringIndexer(inputCol='WitnessPresent',outputCol='WitnessPresent_Index')
SI_AgentType = StringIndexer(inputCol='AgentType',outputCol='AgentType_Index')
SI_NumberOfSuppliments = StringIndexer(inputCol='NumberOfSuppliments',outputCol='NumberOfSuppliments_Index')
SI_AddressChange_Claim = StringIndexer(inputCol='AddressChange_Claim',outputCol='AddressChange_Claim_Index')
SI_NumberOfCars = StringIndexer(inputCol='NumberOfCars',outputCol='NumberOfCars_Index')
SI_BasePolicy = StringIndexer(inputCol='BasePolicy',outputCol='BasePolicy_Index')


df = SI_Month.fit(df).transform(df)
df = SI_DayOfWeek.fit(df).transform(df)
df = SI_Make.fit(df).transform(df)
df = SI_AccidentArea.fit(df).transform(df)
df = SI_DayOfWeekClaimed.fit(df).transform(df)
df = SI_MonthClaimed.fit(df).transform(df)
df = SI_Sex.fit(df).transform(df)
df = SI_MaritalStatus.fit(df).transform(df)
df = SI_Fault.fit(df).transform(df)
df = SI_PolicyType.fit(df).transform(df)
df = SI_VehicleCategory.fit(df).transform(df)
df = SI_VehiclePrice.fit(df).transform(df)
df = SI_Days_Policy_Accident.fit(df).transform(df)
df = SI_Days_Policy_Claim.fit(df).transform(df)
df = SI_PastNumberOfClaims.fit(df).transform(df)
df = SI_AgeOfVehicle.fit(df).transform(df)
df = SI_AgeOfPolicyHolder.fit(df).transform(df)
df = SI_PoliceReportFiled.fit(df).transform(df)
df = SI_WitnessPresent.fit(df).transform(df)
df = SI_AgentType.fit(df).transform(df)
df = SI_NumberOfSuppliments.fit(df).transform(df)
df = SI_AddressChange_Claim.fit(df).transform(df)
df = SI_NumberOfCars.fit(df).transform(df)
df = SI_BasePolicy.fit(df).transform(df)


In [50]:
OHE = OneHotEncoder(inputCols=['Month_Index', 'DayOfWeek_Index','Make_Index','AccidentArea_Index','DayOfWeekClaimed_Index','MonthClaimed_Index','Sex_Index','MaritalStatus_Index','Fault_Index','PolicyType_Index','VehicleCategory_Index','VehiclePrice_Index','Days_Policy_Accident_Index','Days_Policy_Claim_Index','PastNumberOfClaims_Index','AgeOfVehicle_Index','AgeOfPolicyHolder_Index', 'PoliceReportFiled_Index','WitnessPresent_Index','AgentType_Index','NumberOfSuppliments_Index','AddressChange_Claim_Index','NumberOfCars_Index','BasePolicy_Index' ],outputCols=['Month_OHE', 'DayOfWeek_OHE','Make_OHE','AccidentArea_OHE','DayOfWeekClaimed_OHE','MonthClaimed_OHE','Sex_OHE','MaritalStatus_OHE','Fault_OHE','PolicyType_OHE','VehicleCategory_OHE','VehiclePrice_OHE','Days_Policy_Accident_OHE','Days_Policy_Calim','PastNumberOfClaims_OHE','AgeOfVehicle_OHE','AgeOfPolicyHolder_OHE', 'PolicyReportFiled_OHE','WitnessPresent_OHE','AgentType_OHE','NumberOfSuppliments_OHE','AddressChange_Claim_OHE','NumberOfCars_OHE','BasePolicy_OHE' ])
df = OHE.fit(df).transform(df)
df.select('DayOfWeek','DayOfWeek_OHE').show()

+---------+-------------+
|DayOfWeek|DayOfWeek_OHE|
+---------+-------------+
|Wednesday|(6,[4],[1.0])|
|Wednesday|(6,[4],[1.0])|
|   Friday|(6,[1],[1.0])|
| Saturday|(6,[5],[1.0])|
|   Monday|(6,[0],[1.0])|
|   Friday|(6,[1],[1.0])|
| Saturday|(6,[5],[1.0])|
| Saturday|(6,[5],[1.0])|
|  Tuesday|(6,[2],[1.0])|
|   Sunday|    (6,[],[])|
|   Monday|(6,[0],[1.0])|
|   Friday|(6,[1],[1.0])|
|   Monday|(6,[0],[1.0])|
|  Tuesday|(6,[2],[1.0])|
| Thursday|(6,[3],[1.0])|
|   Sunday|    (6,[],[])|
| Thursday|(6,[3],[1.0])|
|   Monday|(6,[0],[1.0])|
|   Friday|(6,[1],[1.0])|
| Saturday|(6,[5],[1.0])|
+---------+-------------+
only showing top 20 rows



In [51]:
df.show()

+-----+-----------+---------+---------+------------+----------------+------------+------------------+------+-------------+---+-------------+--------------------+---------------+---------------+------------+---------+----------+------------+--------------------+-----------------+------------------+------------+-----------------+-----------------+--------------+---------+-------------------+-------------------+------------+----+----------+-----------+---------------+----------+------------------+----------------------+------------------+---------+-------------------+-----------+----------------+---------------------+------------------+--------------------------+-----------------------+------------------------+------------------+-----------------------+-----------------------+--------------------+---------------+-------------------------+-------------------------+------------------+----------------+--------------+-------------+--------------+----------------+--------------------+---------

In [52]:
from pyspark.ml.feature import VectorAssembler

assembler = VectorAssembler(inputCols=['Month_Index','WeekOfMonth', 'DayOfWeek_Index','Make_Index','AccidentArea_Index','DayOfWeekClaimed_Index','WeekOfMonthClaimed','MonthClaimed_Index','Sex_Index','MaritalStatus_Index','Age','Fault_Index','PolicyType_Index','VehicleCategory_Index','VehiclePrice_Index','RepNumber','Deductible','DriverRating','Days_Policy_Accident_Index','Days_Policy_Claim_Index','PastNumberOfClaims_Index','AgeOfVehicle_Index','AgeOfPolicyHolder_Index', 'PoliceReportFiled_Index','WitnessPresent_Index','AgentType_Index','NumberOfSuppliments_Index','AddressChange_Claim_Index','NumberOfCars_Index','BasePolicy_Index','Month_OHE', 'DayOfWeek_OHE','Make_OHE','AccidentArea_OHE','DayOfWeekClaimed_OHE','MonthClaimed_OHE','Sex_OHE','MaritalStatus_OHE','Fault_OHE','PolicyType_OHE','VehicleCategory_OHE','VehiclePrice_OHE','Days_Policy_Accident_OHE','Days_Policy_Calim','PastNumberOfClaims_OHE','AgeOfVehicle_OHE','AgeOfPolicyHolder_OHE', 'PolicyReportFiled_OHE','WitnessPresent_OHE','AgentType_OHE','NumberOfSuppliments_OHE','AddressChange_Claim_OHE','NumberOfCars_OHE','BasePolicy_OHE'],outputCol='features')
                                       
df_final = assembler.transform(df)                                     

In [53]:
df_final.select('features','FraudFound_P').show()

+--------------------+------------+
|            features|FraudFound_P|
+--------------------+------------+
|(142,[0,1,2,3,5,6...|           0|
|(142,[1,2,3,6,7,9...|           0|
|(142,[0,1,2,3,5,6...|           0|
|(142,[0,1,2,3,4,5...|           0|
|(142,[1,3,5,6,7,8...|           0|
|(142,[0,1,2,3,5,6...|           0|
|(142,[0,1,2,3,6,7...|           0|
|(142,[0,1,2,3,5,6...|           0|
|(142,[0,1,2,3,5,6...|           0|
|(142,[0,1,2,3,5,6...|           0|
|(142,[0,1,3,6,7,1...|           0|
|(142,[1,2,3,5,6,7...|           0|
|(142,[1,3,5,6,7,1...|           0|
|(142,[0,1,2,3,6,7...|           0|
|(142,[0,1,2,3,5,6...|           0|
|(142,[0,1,2,3,5,6...|           0|
|(142,[0,1,2,6,9,1...|           0|
|(142,[0,1,3,5,6,1...|           0|
|(142,[0,1,2,3,5,6...|           0|
|(142,[1,2,3,6,7,1...|           0|
+--------------------+------------+
only showing top 20 rows



In [54]:
model_df = df_final.select(['features','FraudFound_P'])
model_df = model_df.withColumnRenamed("FraudFound_P","label")

In [55]:
model_df.printSchema()

root
 |-- features: vector (nullable = true)
 |-- label: integer (nullable = true)



In [56]:
training_df,test_df = model_df.randomSplit([0.80,0.20])

In [57]:
df_class_0 = training_df[training_df['label'] == 0]
df_class_1 = training_df[training_df['label'] == 1]
count_0 = df_class_0.count()
count_1 = df_class_1.count() 
ratio = count_0/count_1
df_1_oversampled = df_class_1.sample(withReplacement=True, fraction=2*ratio, seed=1)
model_df= df_class_0.unionAll(df_1_oversampled)


                                                                                

In [58]:
model_df_class_0 = model_df[training_df['label'] == 0]
model_df_class_1 = model_df[training_df['label'] == 1]
count_00 = model_df_class_0.count()
count_11 = model_df_class_1.count() 
new_ratio = count_00/count_11
print(new_ratio)

0.5027518863737239


In [64]:
from pyspark.ml.classification import LogisticRegression
log_reg=LogisticRegression(featuresCol = 'features', labelCol ='label')
lr_model = log_reg.fit(model_df)
lr_summary=lr_model.summary
print('Accuracy:',lr_summary.accuracy)
print('Precision:',lr_summary.weightedPrecision)
print('Recall:',lr_summary.weightedRecall)



Accuracy: 0.836134329680716
Precision: 0.8429723776018316
Recall: 0.836134329680716


In [65]:
predictions = lr_model.transform(test_df)
predictions.select('label','prediction').show(50)

+-----+----------+
|label|prediction|
+-----+----------+
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    1|       1.0|
|    0|       1.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    1|       1.0|
|    0|       0.0|
|    0|       0.0|
|    1|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    1|       0.0|
|    0|       1.0|
|    0|       1.0|
|    0|       1.0|
|    0|       1.0|
|    0|       1.0|
|    0|       1.0|
|    0|       1.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|     

In [67]:
from pyspark.ml.classification import RandomForestClassifier
rf = RandomForestClassifier(featuresCol = 'features', labelCol = 'label')
rfModel = rf.fit(model_df)
rf_summary= rfModel.summary
print('Accuracy:',rf_summary.accuracy)
print('Precision:', rf_summary.weightedPrecision)
print('Recall:',rf_summary.weightedRecall)

                                                                                

Accuracy: 0.8313494993649763
Precision: 0.8400650512022263
Recall: 0.8313494993649763


                                                                                

In [68]:
predictions = rfModel.transform(test_df)
predictions.select('label','prediction').show(50)

+-----+----------+
|label|prediction|
+-----+----------+
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    1|       1.0|
|    0|       1.0|
|    0|       0.0|
|    0|       0.0|
|    0|       1.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    1|       1.0|
|    0|       0.0|
|    0|       0.0|
|    1|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    1|       1.0|
|    0|       1.0|
|    0|       1.0|
|    0|       1.0|
|    0|       1.0|
|    0|       1.0|
|    0|       1.0|
|    0|       1.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|       0.0|
|    0|     