In [26]:
import openpyxl
import pandas as pd


def read_excel(filename, nrows):
    """Read out a subset of rows from the first worksheet of an excel workbook.

    This function will not load more excel rows than necessary into memory, and is 
    therefore well suited for very large excel files.

    Parameters
    ----------
    filename : str or file-like object
        Path to excel file.
    nrows : int
        Number of rows to parse (starting at the top).

    Returns
    -------
    pd.DataFrame
        Column labels are constructed from the first row of the excel worksheet.

    """
    # Parameter `read_only=True` leads to excel rows only being loaded as-needed
    book = openpyxl.load_workbook(filename=filename, read_only=True, data_only=True)
    first_sheet = book.worksheets[0]
    rows_generator = first_sheet.values

    header_row = next(rows_generator)
    data_rows = [row for (_, row) in zip(range(nrows - 1), rows_generator)]
    return pd.DataFrame(data_rows, columns=header_row)


dframe = read_excel('turknetchurnekimanonim.xlsx', nrows=300)
dframe.head()

Unnamed: 0,NaN,ILTELKODU,ILADI,POSILCE,ILCE,CINSIYET,DURUM,ABONEBAS,CLOSE_DATE,KALDIGI_AY_SAYISI,...,DENVERGIRISTARIHI_3,DENVERCIKISTARIHI_3,DENVERKALDIGISURE_GUN_3,IKNATICKET_3,PORTERROR_SAYISI_3,MAX_SESSIONTIME_3,MIN_SESSIONTIME_3,TOTALUPLOADGB_3,TOTALDOWNLOADGB_3,CHURNTEST_DATE
0,0,212,İstanbul (Avrupa),939,SULTANGAZİ,E,A,2016-04-20,NaT,54,...,,,0,0,10,87108,0,16.5013,272.1967,2020-10-31
1,1,212,İstanbul (Avrupa),420,SARIYER,K,A,2016-04-20,NaT,54,...,,,0,0,1,90271,0,135.8656,185.5825,2020-10-31
2,2,262,Kocaeli,998,İZMİT,E,A,2016-04-21,NaT,54,...,,,0,0,3,90112,0,1.8255,46.6458,2020-10-31
3,3,212,İstanbul (Avrupa),418,KÜÇÜKÇEKMECE,E,A,2016-04-21,NaT,54,...,,,0,0,15,87588,1664,2.0288,42.0181,2020-10-31
4,4,212,İstanbul (Avrupa),425,BAYRAMPAŞA,E,A,2016-04-21,2020-10-02,54,...,,,0,0,0,0,0,0.0,0.0,2020-10-31


In [27]:
list(dframe)

[None,
 'ILTELKODU',
 'ILADI',
 'POSILCE',
 'ILCE',
 'CINSIYET',
 'DURUM',
 'ABONEBAS',
 'CLOSE_DATE',
 'KALDIGI_AY_SAYISI',
 'ODEMESEKLI',
 'ACIKLAMA',
 'FATURAGONDERIMTIPI',
 'RISKLIMUSTERI',
 'YAPA_VAE',
 'KAPASITE',
 'currentDown',
 'ARKADASINIGETIR',
 'ARKADASINIGETIRILEGELENLER',
 'FATURA_GECIKME_1',
 'FATURA_GECIKME_UCRETI_1',
 'CAGRIMERKEZIARAMASAYISI_1',
 'DESTEKSAYISI_1',
 'TDU_DESTEKSAYISI_1',
 'ADSLARIZA_DESTEKSAYISI_1',
 'INTERNETEBAGLANAMIYORUM_DESTEKSAYISI_1',
 'CM_DESTEKSAYISI_1',
 'HIZMETKANALI_SIKAYETSAYISI_1',
 'FATURAINCELEME_DESTEKSAYISI_1',
 'OIMSIKAYET_DESTEKSAYISI_1',
 'CM_TICKETKAPANMASURESI_1',
 'CM_TICKETSL_1',
 'HIZMETKANALI_TICKETKAPANMASURESI_1',
 'HIZMETKANALI_TICKETSL_1',
 'FATURAINCELEME_DESTEKKAPANMASURESI_1',
 'FATURAINCELEME_TICKETSL_1',
 'OIMSIKAYET_DESTEKKAPANMASURESI_1',
 'OIMSIKAYET_TICKETSL_1',
 'TDU_TICKETKAPANMASURESI_1',
 'TDU_TICKETSL_1',
 'ADSLARIZA_TICKETKAPANMASURESI_1',
 'ADSLARIZA_TICKETSL_1',
 'INTERNETEBAGLANAMIYORUM_TICKETKAPANMASURE

In [29]:
len(list(dframe))

125

In [30]:
dframe['DURUM'].value_counts()

A    281
K     18
Name: DURUM, dtype: int64

In [31]:
# Assign outcome as 0 if DURUM=A and 1 if DURUM=K
dframe['DURUM'] = [0 if x == 'A' else 1 for x in dframe['DURUM']]
y = dframe['DURUM']
df = dframe.drop('DURUM',1)

In [33]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: DURUM, dtype: int64

In [34]:
df.head()

Unnamed: 0,NaN,ILTELKODU,ILADI,POSILCE,ILCE,CINSIYET,ABONEBAS,CLOSE_DATE,KALDIGI_AY_SAYISI,ODEMESEKLI,...,DENVERGIRISTARIHI_3,DENVERCIKISTARIHI_3,DENVERKALDIGISURE_GUN_3,IKNATICKET_3,PORTERROR_SAYISI_3,MAX_SESSIONTIME_3,MIN_SESSIONTIME_3,TOTALUPLOADGB_3,TOTALDOWNLOADGB_3,CHURNTEST_DATE
0,0,212,İstanbul (Avrupa),939,SULTANGAZİ,E,2016-04-20,NaT,54,K,...,,,0,0,10,87108,0,16.5013,272.1967,2020-10-31
1,1,212,İstanbul (Avrupa),420,SARIYER,K,2016-04-20,NaT,54,K,...,,,0,0,1,90271,0,135.8656,185.5825,2020-10-31
2,2,262,Kocaeli,998,İZMİT,E,2016-04-21,NaT,54,K,...,,,0,0,3,90112,0,1.8255,46.6458,2020-10-31
3,3,212,İstanbul (Avrupa),418,KÜÇÜKÇEKMECE,E,2016-04-21,NaT,54,B,...,,,0,0,15,87588,1664,2.0288,42.0181,2020-10-31
4,4,212,İstanbul (Avrupa),425,BAYRAMPAŞA,E,2016-04-21,2020-10-02,54,K,...,,,0,0,0,0,0,0.0,0.0,2020-10-31
