In [58]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing Dataset

In [2]:
import cudf as cd
import cupy as cn
data= cd.read_csv('../input/a-dataset/WA_Fn-UseC_-Telco-Customer-Churn (1).csv')
data.head()

# Preprocessing 

In [4]:
data.TotalCharges = cd.to_numeric(data.TotalCharges, errors='coerce')

data.isnull().sum()

data.dropna(how = 'any', inplace = True)
labels = ["{0} - {1}".format(i, i + 11) for i in range(1, 72, 12)]

data['tenure_group'] = cd.cut(data.tenure, range(1, 80, 12), right=False, labels=labels)
data.drop(columns= ['customerID','tenure'], axis=1, inplace=True)
data.head()

In [5]:
data['Churn'] = cn.where(data.Churn == 'Yes',1,0)
data_dummies = cd.get_dummies(data)
data_dummies.head()

In [6]:
data.describe()

In [7]:
df = data_dummies
df.tail()

In [8]:
output_label=df['Churn']
output_label.columns = ['Churn']


In [9]:
input_label=df.iloc[:,0:50]
Y=df.iloc[:,50]
Y=cn.array(Y).reshape(-1,1)

In [10]:
from tensorflow import keras

# Standardization

In [11]:
from cuml.preprocessing import StandardScaler
scaler=StandardScaler()
X_scaled=scaler.fit_transform(input_label)
X_scaled

# Splitting of Dataset

In [13]:
from cuml.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.3, random_state=100)

In [14]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)


In [15]:
x_train.shape[1:]

In [16]:
train_x = x_train.as_gpu_matrix()
train_x.shape

In [17]:
# model = keras.Sequential()
# model.add(keras.layers.Flatten())
# model.add(keras.layers.Dense(64, input_shape=(train_x.shape[1:]), activation='relu'))
# model.add(keras.layers.Dense(128, activation='relu'))
# model.add(keras.layers.Dense(128, activation='relu'))
# model.add(keras.layers.Dense(1, activation='sigmoid'))


# model.compile(loss='binary_crossentropy',metrics=['accuracy'],
# optimizer=keras.optimizers.SGD(learning_rate=0.0005))

# Classifier Model 

In [18]:
from cuml.ensemble import RandomForestClassifier as RF
from cuml.metrics.confusion_matrix import confusion_matrix
from cuml.metrics import accuracy_score

In [23]:
model = RF(max_features=1.0,n_bins=10,n_estimators=100,max_depth=6,random_state=42)
model.fit(x_train.astype('float32'),y_train.astype('float32'))
prediction = model.predict(x_test.astype('float32'))
prediction

In [24]:
y_test = y_test.reshape(1,len(y_test))[0]
print(y_test)

# Evaluative Metrics
Accuracy, R2_Score, MSE

In [56]:
from cuml.metrics import accuracy_score,r2_score,mean_squared_error
acc=accuracy_score(y_test.get(),prediction)
print("Accuracy: "+ str(acc))

In [47]:
y_test=y_test.astype('float32')

In [44]:
pred=prediction.to_array()

In [57]:
r2=r2_score(y_test,pred)
print('R2_score'+str(r2))
mse=mean_squared_error(y_test,prediction)
print('MSE:'+str(mse))