In [1]:
import findspark
findspark.init()

# SparkContext Oluşturma Yöntem 1 : SparkSession

In [15]:
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from pyspark import SparkContext

In [47]:
pyspark = SparkSession.builder \
.master("local[4]") \
.appName("RDD-Olusturmak") \
.config("spark.executor.memory","4g") \
.config("spark.driver.memory","2g") \
.getOrCreate()

In [48]:
sc = pyspark.sparkContext

In [8]:
sc.stop()

# SparkContext Oluşturma Yöntem 2 : SparkSession ve SparkConf

In [9]:
conf = SparkConf() \
.setMaster("local[4]") \
.setAppName("RDD-Olusturmak") \
.setExecutorEnv("spark.executor.memory","4g") \
.setExecutorEnv("spark.driver.memory","4g") 

pyspark = SparkSession.builder \
.config(conf = conf) \
.getOrCreate()

In [10]:
sc = pyspark.sparkContext

In [46]:
sc.stop()

# SparkContext Oluşturma Yöntem 3 : SparkContext ve SparkConf

In [32]:
sparkConf = SparkConf() \
.setMaster("local[4]") \
.setAppName("RDD-Olusturmak") \
.setExecutorEnv("spark.executor.memory","4g") \
.setExecutorEnv("spark.driver.memory","4g") 

In [33]:
sc = SparkContext(conf=sparkConf)

# Python Listelerinden RDD oluşturmak

In [34]:
rdd1 = sc.parallelize([('Ahmet',25),('Kamil',29),('Murat',22),('İbis',18)])

In [35]:
rdd1.take(2)

[('Ahmet', 25), ('Kamil', 29)]

In [37]:
rdd2 = sc.parallelize([['Ahmet',25],['Kamil',29],['Murat',22],['İbis',18]])

In [38]:
rdd2.take(3)

[['Ahmet', 25], ['Kamil', 29], ['Murat', 22]]

In [39]:
rdd2.count()

4

# Pyhon Sözlükten RDD Oluşturmak

In [49]:
my_dict ={
    "Ogrenci":['Ali','Mehmet','Ayse'],
    "Notlar":[70,80,90]
}

In [50]:
import pandas as pd

In [51]:
pdDF=pd.DataFrame(my_dict)

In [52]:
pdDF.head()

Unnamed: 0,Ogrenci,Notlar
0,Ali,70
1,Mehmet,80
2,Ayse,90


In [53]:
rdd_from_pandasDF = pyspark.createDataFrame(pdDF)

In [55]:
rdd_from_pandasDF.show()

+-------+------+
|Ogrenci|Notlar|
+-------+------+
|    Ali|    70|
| Mehmet|    80|
|   Ayse|    90|
+-------+------+



In [56]:
rdd_from_pandasDF = rdd_from_pandasDF.rdd

In [57]:
rdd_from_pandasDF.take(3)

[Row(Ogrenci='Ali', Notlar=70),
 Row(Ogrenci='Mehmet', Notlar=80),
 Row(Ogrenci='Ayse', Notlar=90)]

# Metin Dosyalarından RDD Oluşturmak

In [60]:
rdd_metin = sc.textFile("C:\\Users\\Muhammed\\Desktop\\Spark\\5\\OnlineRetail.csv")

In [61]:
rdd_metin.take(10)

['InvoiceNo;StockCode;Description;Quantity;InvoiceDate;UnitPrice;CustomerID;Country',
 '536365;85123A;WHITE HANGING HEART T-LIGHT HOLDER;6;1.12.2010 08:26;2,55;17850;United Kingdom',
 '536365;71053;WHITE METAL LANTERN;6;1.12.2010 08:26;3,39;17850;United Kingdom',
 '536365;84406B;CREAM CUPID HEARTS COAT HANGER;8;1.12.2010 08:26;2,75;17850;United Kingdom',
 '536365;84029G;KNITTED UNION FLAG HOT WATER BOTTLE;6;1.12.2010 08:26;3,39;17850;United Kingdom',
 '536365;84029E;RED WOOLLY HOTTIE WHITE HEART.;6;1.12.2010 08:26;3,39;17850;United Kingdom',
 '536365;22752;SET 7 BABUSHKA NESTING BOXES;2;1.12.2010 08:26;7,65;17850;United Kingdom',
 '536365;21730;GLASS STAR FROSTED T-LIGHT HOLDER;6;1.12.2010 08:26;4,25;17850;United Kingdom',
 '536366;22633;HAND WARMER UNION JACK;6;1.12.2010 08:28;1,85;17850;United Kingdom',
 '536366;22632;HAND WARMER RED POLKA DOT;6;1.12.2010 08:28;1,85;17850;United Kingdom']