In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# # Data Collecting

In [2]:
data = pd.read_excel("/content/transposed.xlsx")

# # Data Description

## Data and File Description

  Data yang digunakan merupakan rangkaian beberapa dataset yang menyatakan berbagai detail terkait dengan perolehan, pengguna, dan perkembangan berbagai layanan dari perusahaan Telkomsel per kuarter dari 2000 sampai 2020. 

File data tersedia dalam format xlsx dengan ukuran 102 KB. Data tersebut didapatkan dari GSMA Intelligence, diberikan salah seorang orang tua anggota kelompok kepada kami.

 Data yang digunakan awalnya berdimensi 84 x 179. Namun, akibat banyaknya atribut yang menjelaskan dataset berbagai yang beragam, kami memutuskan untuk membatasi analisis menjadi 7 atribut pertama (5 dataset pertama). Karena itu, data yang akan dianalisis berdimensi 84 x 7.

## Karakteristik Data

### Quarter

Atribut kategorikal nominal yang menjelaskan baris data tersebut berasal dari kuartal berapa

#### Kategori dalam atribut

In [3]:
print(len(data["Quarter"].unique()))
data["Quarter"].unique().tolist()

4


[1, 2, 3, 4]

### Year

Atribut kategorikal nominal, menunjukkan asal tahun baris data tersebut

#### Kategori dalam atribut

In [4]:
print(len(data["Year"].unique()))
data["Year"].unique().tolist()

21


[2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016,
 2017,
 2018,
 2019,
 2020]

### Percentage Prepaid Connections

Atribut kuantitatif, menjelaskan persentase pengguna layanan prabayar dari total pelanggan

#### Range

In [5]:
max_ = data['Percentage prepaid connections'].max()
min_ = data['Percentage prepaid connections'].min()
print('maximum =', max_)
print('minimum =', min_)
print('range =', max_-min_)

maximum = 0.9828
minimum = 0.5285
range = 0.45430000000000004


#### Persentase data yang kosong

In [6]:
null_ = data['Percentage prepaid connections'].isna().sum()
percentage_ = 100*(null_/ len(data))
print('Jumlah data yang kosong =', null_)
print('persen =', percentage_)

Jumlah data yang kosong = 1
persen = 1.1904761904761905


### Percentage Contract Connections

Atribut kuantitatif, menjelaskan persentase pengguna layanan kontrak dari total pelanggan

#### Range

In [7]:
max_ = data['Percentage contract connections'].max()
min_ = data['Percentage contract connections'].min()
print('maximum =', max_)
print('minimum =', min_)
print('range =', max_-min_)

maximum = 0.4715
minimum = 0.0172
range = 0.4543


#### Persentase data yang kosong

In [8]:
null_ = data['Percentage contract connections'].isna().sum()
percentage_ = 100*(null_/ len(data))
print('Jumlah data yang kosong =', null_)
print('persen =', percentage_)

Jumlah data yang kosong = 1
persen = 1.1904761904761905


### Percentage 2G Connections

Atribut kuantitatif, menjelaskan persentase pelanggan yang mengggunakan koneksi 2G dari total pelanggan

#### Range

In [9]:
max_ = data['Percentage 2G connections'].max()
min_ = data['Percentage 2G connections'].min()
print('maximum =', max_)
print('minimum =', min_)
print('range =', max_-min_)

maximum = 1.0
minimum = 0.0251
range = 0.9749


#### Persentase data yang kosong

In [10]:
null_ = data['Percentage 2G connections'].isna().sum()
percentage_ = 100*(null_/ len(data))
print('Jumlah data yang kosong =', null_)
print('persen =', percentage_)

Jumlah data yang kosong = 1
persen = 1.1904761904761905


### Percentage 3G Connections

Atribut kuantitatif, menjelaskan persentase pelanggan yang mengggunakan koneksi 3G dari total pelanggan

#### Range

In [11]:
max_ = data['Percentage 3G connections'].max()
min_ = data['Percentage 3G connections'].min()
print('maximum =', max_)
print('minimum =', min_)
print('range =', max_-min_)

maximum = 0.48
minimum = 0.0002
range = 0.4798


#### Persentase data yang kosong

In [12]:
null_ = data['Percentage 3G connections'].isna().sum()
percentage_ = 100*(null_/ len(data))
print('Jumlah data yang kosong =', null_)
print('persen =', percentage_)

Jumlah data yang kosong = 27
persen = 32.142857142857146


### Percentage 4G Connections

Atribut kuantitatif, menjelaskan persentase pelanggan yang mengggunakan koneksi 4G dari total pelanggan

#### Range

In [13]:
max_ = data['Percentage 4G connections'].max()
min_ = data['Percentage 4G connections'].min()
print('maximum =', max_)
print('minimum =', min_)
print('range =', max_-min_)

maximum = 0.5565
minimum = 0.0004
range = 0.5561


#### Persentase data yang kosong

In [14]:
null_ = data['Percentage 4G connections'].isna().sum()
percentage_ = 100*(null_/ len(data))
print('Jumlah data yang kosong =', null_)
print('persen =', percentage_)

Jumlah data yang kosong = 60
persen = 71.42857142857143


# # Data Preprocessing (Data Cleansing)

#### Kondisi data sebelum di hapus beberapa kolomnya

In [15]:
data

Unnamed: 0,Quarter,Year,Percentage prepaid connections,Percentage contract connections,Percentage 2G connections,Percentage 3G connections,Percentage 4G connections,Percentage mobile broadband capable connections,Percentage smartphone connections,Percentage basic/feature phone connections,Percentage mobile data-only device connections,Prepaid % recurring revenue; cellular,Contract % recurring revenue; cellular,Voice % recurring revenue; cellular,Interconnection % recurring revenue; cellular,Roaming % recurring revenue; cellular,Non-voice % recurring revenue; cellular,Data % recurring revenue; cellular,Messaging % recurring revenue; cellular,VAS % recurring revenue; cellular,ARPU; by mobile connection,Prepaid ARPU; by mobile connection,Contract ARPU; by mobile connection,Voice ARPU; by mobile connection,Roaming ARPU; by mobile connection,Non-voice ARPU; by mobile connection,Data ARPU; by mobile connection,Messaging ARPU; by mobile connection,VAS ARPU; by mobile connection,Total Base stations,2G Base stations,3G Base stations,4G Base stations,Total Capex,Annual capex/revenue; cellular,Total cellular connections,Total mobile connections,Prepaid mobile connections,Contract mobile connections,2G connections,...,Net additions of 4G connections,Net additions of mobile broadband capable connections,Net additions of smartphone connections,Net additions of basic/feature phone connections,Net additions of mobile data-only device connections,Net additions of GSM connections,Net additions of WCDMA (Family) connections,Net additions of LTE (Family) connections,Net additions of licensed cellular IoT connections,Net additions of cellular M2M connections,Net additions of LPWA connections,Total net profit; cellular,Total cellular network coverage; by population,3G network coverage; by population,4G network coverage; by population,Total Operating free cash flow; cellular,Total opex; cellular,Cost of Sales opex; cellular,Cost of Services opex; cellular,SG&A opex; cellular,Personnel opex; cellular,Selling & Marketing opex; cellular,Administration opex; cellular,Other opex; cellular,Total Opex/revenue; annual; cellular,Total revenue; cellular,Recurring revenue; cellular,Prepaid revenue; cellular,Contract revenue; cellular,Voice revenue; cellular,Interconnection revenue; cellular,Roaming revenue; cellular,Non-voice revenue; cellular,Data revenue; cellular,Messaging revenue; cellular,VAS revenue; cellular,Non-Recurring revenue; cellular,Equipment revenue; cellular,Total SMS messages,SMS messages; per connection
0,1,2000,0.5285,0.4715,1.0000,,,,,,,,,,,,,,,,12.17,7.00,19.10,,,,,,,,,,,,,,1121451.0,592684.0,528767.0,1121451.0,...,,,,,,,,,,,,,,,,,1.299560e+07,,,,,,,,,4.297519e+07,4.092924e+07,1.351743e+07,,,,,,,,,2.045952e+06,,,
1,2,2000,0.5577,0.4423,1.0000,,,,,,,,,,,,,,,,12.17,7.00,19.10,,,,,,,,,,,,,,1285560.0,716977.0,568583.0,1285560.0,...,,,,,,164109.0,,,,,,,,,,,1.299560e+07,,,,,,,,,4.673166e+07,4.392395e+07,1.351743e+07,,,,,,,,,2.807706e+06,,,
2,3,2000,0.5782,0.4218,1.0000,,,,,,,,,,,,,,,,12.17,7.00,19.10,,,,,,,1331.0,,,,,,,1449669.0,838271.0,611398.0,1449669.0,...,,,,,,164109.0,,,,,,,,,,,1.299560e+07,,,,,,,,,5.299276e+07,4.991339e+07,1.351743e+07,,,,,,,,,3.079378e+06,,,
3,4,2000,0.6104,0.3896,1.0000,,,,,,,,,,,,,,,,12.17,7.00,19.10,,,,,,,1411.0,,,,,,,1687339.0,1029903.0,657436.0,1687339.0,...,,,,,,237670.0,,,,,,,,,,,1.766242e+07,,,,,,,,0.2793,6.064991e+07,5.724518e+07,1.813905e+07,,,,,,,,,3.404727e+06,,,
4,1,2001,0.6339,0.3661,1.0000,,,,,,,,,,,,,,,,11.28,6.66,18.96,,,,,,,1555.0,,,,,,,1930756.0,1223815.0,706941.0,1930756.0,...,,,,,,243417.0,,,,,,2.643800e+07,,,,,2.042421e+07,,,,,,,,,6.229004e+07,6.122917e+07,2.031200e+07,3.537799e+07,,,6.600054e+06,,,,,1.060872e+06,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,4,2019,0.9627,0.0373,0.1020,0.4352,0.4628,0.8980,0.6698,0.2209,0.1093,0.8641,0.0948,0.3403,,,0.6597,0.5876,0.0721,,3.06,2.66,7.53,1.01,,1.95,1.74,0.21,,212235.0,50297.0,82104.0,79834.0,3.126345e+08,0.1428,176201260.0,171105000.0,164729000.0,6376000.0,17459895.0,...,5082978.0,4007206.0,604485.0,-294862.0,-132623.0,-3830206.0,-1075772.0,5082978.0,251100.0,140175.0,110925.0,4.470673e+08,,0.8600,0.95,5.339933e+08,7.041752e+08,1.027616e+08,1.027616e+08,5.988989e+08,8.420742e+07,2.963231e+07,4.850592e+08,2.514669e+06,0.4600,1.548288e+09,1.518724e+09,1.312385e+09,1.440158e+08,5.167984e+08,,,1.001926e+09,8.923675e+08,1.095580e+08,,2.956435e+07,,1.130000e+10,22.0
80,1,2020,0.9604,0.0396,0.0311,0.4507,0.5182,0.9689,0.6804,0.2114,0.1082,0.8666,0.0976,0.3081,,,0.6919,0.6301,0.0618,,3.06,2.77,7.56,0.94,,2.12,1.93,0.19,,219323.0,50297.0,82104.0,86922.0,1.631136e+08,0.1403,167929295.0,162567000.0,156129000.0,6438000.0,5049609.0,...,5059788.0,3872286.0,-3995027.0,-3420098.0,-1122875.0,-12410286.0,-1187502.0,5059788.0,266035.0,139831.0,126204.0,4.795541e+08,,0.8602,0.95,7.760811e+08,5.848304e+08,1.077230e+08,1.077230e+08,4.771074e+08,8.631430e+07,2.983620e+07,3.609569e+08,,0.4432,1.524093e+09,1.495412e+09,1.295870e+09,1.459187e+08,4.607281e+08,,,1.034684e+09,9.422531e+08,9.243106e+07,,2.868081e+07,,1.050000e+10,21.0
81,2,2020,0.9609,0.0391,0.0251,0.4184,0.5565,0.9749,0.6945,0.1987,0.1068,0.8653,0.1024,0.2837,,,0.7163,0.6591,0.0572,,3.13,2.70,7.85,0.85,,2.15,1.98,0.17,,228066.0,50297.0,82104.0,95665.0,1.563172e+08,0.1372,165721044.0,160072000.0,153816000.0,6256000.0,4016744.0,...,4839186.0,-1462135.0,554265.0,-2562458.0,-486807.0,-1032865.0,-6301321.0,4839186.0,286749.0,146578.0,140171.0,3.842685e+08,,0.8604,0.95,6.994177e+08,6.112684e+08,1.071793e+08,1.071793e+08,5.040891e+08,9.202328e+07,3.792392e+07,3.741419e+08,,0.4296,1.466935e+09,1.439342e+09,1.245509e+09,1.474140e+08,4.083278e+08,,,1.031014e+09,9.486417e+08,8.237239e+07,,2.759339e+07,,9.100000e+09,18.0
82,3,2020,0.9629,0.0371,0.0484,0.4013,0.5503,0.9516,0.7093,0.1854,0.1053,0.8654,0.1038,0.2815,,,0.7185,0.6589,0.0597,,2.99,2.48,7.71,0.78,,1.98,1.81,0.16,,228441.0,50323.0,77928.0,100190.0,1.699100e+08,0.1349,176072910.0,170117000.0,163809000.0,6308000.0,8235616.0,...,4532988.0,5826128.0,9496205.0,-265019.0,813814.0,4218872.0,1293140.0,4532988.0,306866.0,151133.0,155733.0,3.606171e+08,,0.8605,0.95,6.509593e+08,6.149384e+08,8.162478e+07,8.162478e+07,5.333136e+08,9.283885e+07,4.254547e+07,3.979293e+08,,0.4211,1.435808e+09,1.405768e+09,1.216488e+09,1.459187e+08,3.956865e+08,,,1.010081e+09,9.262136e+08,8.386760e+07,,3.004010e+07,,1.130000e+10,22.0


#### Menghapus attribut yang tidak diperlukan

In [16]:
dropped = [0 for i in range(172)]
j = 7
for i in range(172):   
  dropped[i] = j
  j += 1
data.drop(data.columns[dropped], axis=1, inplace=True)

#### Kondisi data setelah dihapus beberapa kolom

In [17]:
data

Unnamed: 0,Quarter,Year,Percentage prepaid connections,Percentage contract connections,Percentage 2G connections,Percentage 3G connections,Percentage 4G connections
0,1,2000,0.5285,0.4715,1.0000,,
1,2,2000,0.5577,0.4423,1.0000,,
2,3,2000,0.5782,0.4218,1.0000,,
3,4,2000,0.6104,0.3896,1.0000,,
4,1,2001,0.6339,0.3661,1.0000,,
...,...,...,...,...,...,...,...
79,4,2019,0.9627,0.0373,0.1020,0.4352,0.4628
80,1,2020,0.9604,0.0396,0.0311,0.4507,0.5182
81,2,2020,0.9609,0.0391,0.0251,0.4184,0.5565
82,3,2020,0.9629,0.0371,0.0484,0.4013,0.5503


#### Mengecek nama variable columns

In [18]:
data.columns

Index(['Quarter', 'Year', 'Percentage prepaid connections',
       'Percentage contract connections', 'Percentage 2G connections',
       'Percentage 3G connections', 'Percentage 4G connections'],
      dtype='object')

#### Mengubah nama-nama variables collumn menjadi tidak ada spasi

In [19]:
data.columns = ["quarter","year","percentage_prepaid_connections","percentage_contract_connections","percentage_2G_connections","percentage_3G_connections","percentage_4G_connections"]

In [20]:
data.columns

Index(['quarter', 'year', 'percentage_prepaid_connections',
       'percentage_contract_connections', 'percentage_2G_connections',
       'percentage_3G_connections', 'percentage_4G_connections'],
      dtype='object')

In [21]:
data

Unnamed: 0,quarter,year,percentage_prepaid_connections,percentage_contract_connections,percentage_2G_connections,percentage_3G_connections,percentage_4G_connections
0,1,2000,0.5285,0.4715,1.0000,,
1,2,2000,0.5577,0.4423,1.0000,,
2,3,2000,0.5782,0.4218,1.0000,,
3,4,2000,0.6104,0.3896,1.0000,,
4,1,2001,0.6339,0.3661,1.0000,,
...,...,...,...,...,...,...,...
79,4,2019,0.9627,0.0373,0.1020,0.4352,0.4628
80,1,2020,0.9604,0.0396,0.0311,0.4507,0.5182
81,2,2020,0.9609,0.0391,0.0251,0.4184,0.5565
82,3,2020,0.9629,0.0371,0.0484,0.4013,0.5503


#### Mengubah nilai pada setiap cell percentage menjadi bilangan percentage skala 100%

Angka percentage di dalam cell data perlu dikalikan 100, karena ketika dioutputkan di ipynb angka tercovert menjadi dibagi 100

In [22]:
for i in range(84):
  # mengalikan 100 value dalam cell yang memiliki nilai pada kolom percentage_prepaid_connections
  if pd.isnull(data.loc[i, "percentage_prepaid_connections"]) == False:
    data.loc[i, "percentage_prepaid_connections"] = data.loc[i, "percentage_prepaid_connections"] * 100
  
  # mengalikan 100 value dalam cell yang memiliki nilai pada kolom percentage_contract_connections
  if pd.isnull(data.loc[i, "percentage_contract_connections"]) == False:
    data.loc[i, "percentage_contract_connections"] = data.loc[i, "percentage_contract_connections"] * 100
  
  # mengalikan 100 value dalam cell yang memiliki nilai pada kolom percentage_2G_connections
  if pd.isnull(data.loc[i, "percentage_2G_connections"]) == False:
    data.loc[i, "percentage_2G_connections"] = data.loc[i, "percentage_2G_connections"] * 100
  
  # mengalikan 100 value dalam cell yang memiliki nilai pada kolom percentage_3G_connections
  if pd.isnull(data.loc[i, "percentage_3G_connections"]) == False:
    data.loc[i, "percentage_3G_connections"] = data.loc[i, "percentage_3G_connections"] * 100
  
  # mengalikan 100 value dalam cell yang memiliki nilai pada kolom percentage_4G_connections
  if pd.isnull(data.loc[i, "percentage_4G_connections"]) == False:
    data.loc[i, "percentage_4G_connections"] = data.loc[i, "percentage_4G_connections"] * 100

#### Kondisi data setelah value dalam cell kolom percentage dikalikan 100

In [23]:
data

Unnamed: 0,quarter,year,percentage_prepaid_connections,percentage_contract_connections,percentage_2G_connections,percentage_3G_connections,percentage_4G_connections
0,1,2000,52.85,47.15,100.00,,
1,2,2000,55.77,44.23,100.00,,
2,3,2000,57.82,42.18,100.00,,
3,4,2000,61.04,38.96,100.00,,
4,1,2001,63.39,36.61,100.00,,
...,...,...,...,...,...,...,...
79,4,2019,96.27,3.73,10.20,43.52,46.28
80,1,2020,96.04,3.96,3.11,45.07,51.82
81,2,2020,96.09,3.91,2.51,41.84,55.65
82,3,2020,96.29,3.71,4.84,40.13,55.03


# # Data Observation (Statistic and Visualization)

## Statistik

### Sampel Data

#### Data pertama

In [24]:
data[0:1]

Unnamed: 0,quarter,year,percentage_prepaid_connections,percentage_contract_connections,percentage_2G_connections,percentage_3G_connections,percentage_4G_connections
0,1,2000,52.85,47.15,100.0,,


Pada sampel data pertama ini mengambil data line pertama pada dataset. Ada beberapa atribut yang belum ada nilainya, yaitu percentage_3G_connections dan percentage_4G_connections, dimana ini menjukkan pada waktu tersebut belum ada 3g dan 4g

#### Data pertama saat 4g mulai masuk dalam percantage

**Mencari index baris saat semua 2g 3g dan 4g sudah memiliki data (tidak ada yang NaN/Null)**

In [25]:
idx = 0 #sebagai index terbesar sementara
i = 0
while pd.isnull(data.loc[i,"percentage_4G_connections"]) == True: #loop untuk mengecek percentage 4g yang memiliki value NaN sampai menemukan cell yang tidak NaN
  i += 1

idx = i

i = 0
while pd.isnull(data.loc[i,"percentage_3G_connections"]) == True: #loop untuk mengecek percentage 3g yang memiliki value NaN sampai menemukan cell yang tidak NaN
  i += 1

if i > idx:
  idx = i

i = 0
while pd.isnull(data.loc[i,"percentage_2G_connections"]) == True: #loop untuk mengecek percentage 2g yang memiliki value NaN sampai menemukan cell yang tidak NaN
  i += 1

if i > idx:
  idx = i

**Menggunakan idx yang ditemukan untuk mengoutputkan data yang dicari**

In [26]:
data[idx:idx+1]

Unnamed: 0,quarter,year,percentage_prepaid_connections,percentage_contract_connections,percentage_2G_connections,percentage_3G_connections,percentage_4G_connections
59,4,2014,97.97,2.03,75.19,24.77,0.04


Semua data mulai tidak null berada pada index ke 59 dataset, (mulai pada line ke 60 pada dataset)

#### Data terakhir pengambilan percentage

**Mencari index baris dataset dimana pada baris tersebut adalah terakhir kalinya data diambil oleh telkomsel (baris sebelum NaN terakhir)**

In [27]:
idx = 0 #sebagai index terbesar sementara
i = 59
while pd.isnull(data.loc[i,"percentage_4G_connections"]) == False: #loop untuk mengecek percentage 4g yang memiliki value sampai menemukan cell yang NaN
  i += 1

idx = i

i = 59
while pd.isnull(data.loc[i,"percentage_3G_connections"]) == False: #loop untuk mengecek percentage 3g yang memiliki value sampai menemukan cell yang NaN
  i += 1

if i > idx:
  idx = i

i = 59
while pd.isnull(data.loc[i,"percentage_2G_connections"]) == False: #loop untuk mengecek percentage 2g yang memiliki value sampai menemukan cell yang NaN
  i += 1

if i > idx:
  idx = i

**Menggunakan index yang didapat untuk mencari data dimana bari percentage terakhir diambil**

In [28]:
data[idx-1:idx]

Unnamed: 0,quarter,year,percentage_prepaid_connections,percentage_contract_connections,percentage_2G_connections,percentage_3G_connections,percentage_4G_connections
82,3,2020,96.29,3.71,4.84,40.13,55.03


data terakhir yang diambil telkomsel berda pada index data 82, yang berarti index 83 data percentage NULL semua (NaN)

#### 5 data terbesar berdasarkan percentage_prepaid_conections

In [29]:
sorted_ppc = data.sort_values(["percentage_prepaid_connections"], ascending=[0])
sorted_ppc.head()

Unnamed: 0,quarter,year,percentage_prepaid_connections,percentage_contract_connections,percentage_2G_connections,percentage_3G_connections,percentage_4G_connections
51,4,2012,98.28,1.72,83.28,16.72,
50,3,2012,98.23,1.77,83.38,16.62,
49,2,2012,98.19,1.81,83.41,16.59,
52,1,2013,98.16,1.84,82.4,17.6,
53,2,2013,98.14,1.86,82.18,17.82,


In [30]:
sorted_ppc["percentage_prepaid_connections"]

51    98.28
50    98.23
49    98.19
52    98.16
53    98.14
      ...  
3     61.04
2     57.82
1     55.77
0     52.85
83      NaN
Name: percentage_prepaid_connections, Length: 84, dtype: float64

#### 5 data terbesar berdasarkan percentage_contract_conections

In [31]:
sorted_pcc = data.sort_values(["percentage_contract_connections"], ascending=[0])
sorted_pcc.head()

Unnamed: 0,quarter,year,percentage_prepaid_connections,percentage_contract_connections,percentage_2G_connections,percentage_3G_connections,percentage_4G_connections
0,1,2000,52.85,47.15,100.0,,
1,2,2000,55.77,44.23,100.0,,
2,3,2000,57.82,42.18,100.0,,
3,4,2000,61.04,38.96,100.0,,
4,1,2001,63.39,36.61,100.0,,


In [32]:
sorted_pcc["percentage_contract_connections"]

0     47.15
1     44.23
2     42.18
3     38.96
4     36.61
      ...  
52     1.84
49     1.81
50     1.77
51     1.72
83      NaN
Name: percentage_contract_connections, Length: 84, dtype: float64

#### 5 data terbesar berdasarkan percentage_2G_connections

In [33]:
sorted_p2c = data.sort_values(["percentage_2G_connections"], ascending=[0])
sorted_p2c.head()

Unnamed: 0,quarter,year,percentage_prepaid_connections,percentage_contract_connections,percentage_2G_connections,percentage_3G_connections,percentage_4G_connections
0,1,2000,52.85,47.15,100.0,,
14,3,2003,88.74,11.26,100.0,,
25,2,2006,94.64,5.36,100.0,,
24,1,2006,94.31,5.69,100.0,,
23,4,2005,93.94,6.06,100.0,,


In [34]:
sorted_p2c["percentage_2G_connections"]

0     100.00
14    100.00
25    100.00
24    100.00
23    100.00
       ...  
79     10.20
82      4.84
80      3.11
81      2.51
83       NaN
Name: percentage_2G_connections, Length: 84, dtype: float64

#### 5 data terbesar berdasarkan percentage_3G_connections

In [35]:
sorted_p3c = data.sort_values(["percentage_3G_connections"], ascending=[0])
sorted_p3c.head()

Unnamed: 0,quarter,year,percentage_prepaid_connections,percentage_contract_connections,percentage_2G_connections,percentage_3G_connections,percentage_4G_connections
75,4,2018,96.69,3.31,15.0,48.0,37.0
73,2,2018,97.02,2.98,20.27,47.35,32.38
74,3,2018,96.74,3.26,17.86,47.21,34.93
76,1,2019,96.62,3.38,15.8,45.89,38.31
77,2,2019,96.46,3.54,13.17,45.59,41.24


In [36]:
sorted_p3c["percentage_3G_connections"]

75    48.00
73    47.35
74    47.21
76    45.89
77    45.59
      ...  
22      NaN
23      NaN
24      NaN
25      NaN
83      NaN
Name: percentage_3G_connections, Length: 84, dtype: float64

#### 5 data terbesar berdasarkan percentage_4G_connections

In [37]:
sorted_p4c = data.sort_values(["percentage_4G_connections"], ascending=[0])
sorted_p4c.head()

Unnamed: 0,quarter,year,percentage_prepaid_connections,percentage_contract_connections,percentage_2G_connections,percentage_3G_connections,percentage_4G_connections
81,2,2020,96.09,3.91,2.51,41.84,55.65
82,3,2020,96.29,3.71,4.84,40.13,55.03
80,1,2020,96.04,3.96,3.11,45.07,51.82
79,4,2019,96.27,3.73,10.2,43.52,46.28
78,3,2019,96.39,3.61,12.46,44.19,43.35


In [38]:
sorted_p4c["percentage_4G_connections"]

81    55.65
82    55.03
80    51.82
79    46.28
78    43.35
      ...  
55      NaN
56      NaN
57      NaN
58      NaN
83      NaN
Name: percentage_4G_connections, Length: 84, dtype: float64

### Statistik Deskriptif

#### Rata-Rata, Standar Deviasi, dan Koefisien Varians



##### Rata-Rata, Standar Deviasi, dan Koefisien Varians dari kolom **percentage_prepaid_connections**

In [39]:
# Rata-Rata percentage_prepaid_connections
data["percentage_prepaid_connections"].mean()

92.03192771084339

In [40]:
# Standar Deviasi percentage_prepaid_connections
data["percentage_prepaid_connections"].std()

10.888232343024562

In [41]:
# koefisien varians percentage_prepaid_connections
data["percentage_prepaid_connections"].mean() / data["percentage_prepaid_connections"].std()

8.452421367532878

##### Rata-Rata, Standar Deviasi, dan Koefisien Varians dari kolom **percentage_contract_connections**

In [42]:
# Rata-Rata percentage_contract_connections
data["percentage_contract_connections"].mean()

7.968072289156624

In [43]:
# Standar Deviasi percentage_contract_connections
data["percentage_contract_connections"].std()

10.88823234302456

In [44]:
# koefisien varians percentage_prepaid_connections
data["percentage_contract_connections"].mean() / data["percentage_contract_connections"].std()

0.7318058650963021

##### Rata-Rata, Standar Deviasi, dan Koefisien Varians dari kolom **percentage_2G_connections**

In [45]:
# Rata-Rata percentage_2G_connections
data["percentage_2G_connections"].mean()

77.7796385542169

In [46]:
# Standar Deviasi percentage_2G_connections
data["percentage_2G_connections"].std()

30.11580969806727

In [47]:
# koefisien varians percentage_2G_connections
data["percentage_2G_connections"].mean() / data["percentage_2G_connections"].std()

2.582684620935446

##### Rata-Rata, Standar Deviasi, dan Koefisien Varians dari kolom **percentage_3G_connections**

In [48]:
# Rata-Rata percentage_3G_connections
data["percentage_3G_connections"].mean()

22.398070175438598

In [49]:
# Standar Deviasi percentage_3G_connections
data["percentage_3G_connections"].std()

16.46870196535109

In [50]:
# koefisien varians percentage_3G_connections
data["percentage_3G_connections"].mean() / data["percentage_3G_connections"].std()

1.3600385885033592

##### Rata-Rata, Standar Deviasi, dan Koefisien Varians dari kolom **percentage_4G_connections**

In [51]:
# Rata-Rata percentage_4G_connections
data["percentage_4G_connections"].mean()

23.65041666666667

In [52]:
# Standar Deviasi percentage_4G_connections
data["percentage_4G_connections"].std()

19.51658057166124

In [53]:
# koefisien varians percentage_4G_connections
data["percentage_4G_connections"].mean() / data["percentage_4G_connections"].std()

1.2118114943253893

#### Percentile

##### Percentil kolom percentage_prepaid_connections

In [57]:
data["percentage_prepaid_connections"].quantile([0.1, 0.25, 0.5, 0.75, 0.9])

0.10    76.340
0.25    92.845
0.50    96.690
0.75    97.720
0.90    98.086
Name: percentage_prepaid_connections, dtype: float64

##### Percentil kolom percentage_contract_connections

In [58]:
data["percentage_contract_connections"].quantile([0.1, 0.25, 0.5, 0.75, 0.9])

0.10     1.914
0.25     2.280
0.50     3.310
0.75     7.155
0.90    23.660
Name: percentage_contract_connections, dtype: float64

##### Percentil kolom percentage_2G_connections

In [59]:
data["percentage_2G_connections"].quantile([0.1, 0.25, 0.5, 0.75, 0.9])

0.10     18.342
0.25     69.315
0.50     91.820
0.75    100.000
0.90    100.000
Name: percentage_2G_connections, dtype: float64

##### Percentil kolom percentage_3G_connections

In [60]:
data["percentage_3G_connections"].quantile([0.1, 0.25, 0.5, 0.75, 0.9])

0.10     1.440
0.25     7.380
0.50    19.080
0.75    39.660
0.90    44.542
Name: percentage_3G_connections, dtype: float64

##### Percentil kolom percentage_4G_connections

In [61]:
data["percentage_4G_connections"].quantile([0.1, 0.25, 0.5, 0.75, 0.9])

0.10     0.5560
0.25     3.3175
0.50    23.2700
0.75    39.0425
0.90    50.1580
Name: percentage_4G_connections, dtype: float64

#### Ekstremum

##### Nilai Maximum dan Minimum percentage_prepaid_connections

In [62]:
data["percentage_prepaid_connections"].max()

98.28

In [63]:
data["percentage_prepaid_connections"].min()

52.849999999999994

##### Nilai Maximum dan Minimum percentage_contract_connections

In [64]:
data["percentage_contract_connections"].max()

47.15

In [65]:
data["percentage_contract_connections"].min()

1.72

##### Nilai Maximum dan Minimum percentage_2G_connections

In [66]:
data["percentage_2G_connections"].max()

100.0

In [67]:
data["percentage_2G_connections"].min()

2.5100000000000002

##### Nilai Maximum dan Minimum percentage_3G_connections

In [68]:
data["percentage_3G_connections"].max()

48.0

In [69]:
data["percentage_3G_connections"].min()

0.02

##### Nilai Maximum dan Minimum percentage_4G_connections

In [70]:
data["percentage_4G_connections"].max()

55.65

In [71]:
data["percentage_4G_connections"].min()

0.04

#### Distribusi Frekuensi

## Data Visualization