In [84]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import UserDefinedFunction
from pyspark.sql.types import FloatType

spark = SparkSession.Builder().appName('Spark DataFrame Introduction').getOrCreate()

dollarDF = spark.read.option('inferSchema','true')\
                        .option('header','false')\
                        .option('delimiter','\t').csv('datasets/DollarDataset.txt')

dollarDF = dollarDF.withColumnRenamed('_c0','id')
dollarDF = dollarDF.withColumnRenamed('_c1','tarih')
dollarDF = dollarDF.withColumnRenamed('_c2','fiyat')
dollarDF = dollarDF.filter(dollarDF['fiyat'].isNotNull())
dollarDF = dollarDF.drop('id')
#dollarDF.show(1000)
rdd = dollarDF.rdd.zipWithIndex()
dollarDF = rdd.map(lambda x : (x[1],x[0][0],x[0][1])).toDF()
dollarDF = dollarDF.withColumnRenamed('_1','id')
dollarDF = dollarDF.withColumnRenamed('_2','tarih')
dollarDF = dollarDF.withColumnRenamed('_3','fiyat')

def stringToFloat(f):
    f = f.replace('.','') #Noktayı siliyoruz
    f = f.replace(',','.') # , (virgül) olan yerleri . (nokta) ile değiştiriyoruz
    return float(f)

stringToFloatUDF = UserDefinedFunction(stringToFloat,FloatType())
dollarDF = dollarDF.withColumn("fiyat",stringToFloatUDF('fiyat'))
dollarDF.printSchema()

dollarDF2 = dollarDF
dollarDF2 = dollarDF2.withColumn("id",dollarDF2['id']+1)
dollarDF2 = dollarDF2.withColumnRenamed('fiyat','fiyat2')
dollarDF2 = dollarDF2.withColumnRenamed('tarih','tarih2')
dollarDF2 = dollarDF2.select('id','tarih2','fiyat2')

dollarDF2.show(2)
dollarDF.show(2)

joinedDF = dollarDF2.join(dollarDF,'id')
joinedDF = joinedDF.sort('id')

joinedDF = joinedDF.withColumn('oran', (joinedDF["fiyat"]*100/joinedDF["fiyat2"])-100)

joinedDF = joinedDF.sort('oran',ascending=False)

joinedDF.show()

root
 |-- id: long (nullable = true)
 |-- tarih: string (nullable = true)
 |-- fiyat: float (nullable = true)

+---+----------+------+
| id|    tarih2|fiyat2|
+---+----------+------+
|  1|02-01-1950|   2.8|
|  2|03-01-1950|   2.8|
+---+----------+------+
only showing top 2 rows

+---+----------+-----+
| id|     tarih|fiyat|
+---+----------+-----+
|  0|02-01-1950|  2.8|
|  1|03-01-1950|  2.8|
+---+----------+-----+
only showing top 2 rows

+-----+----------+---------+----------+---------+------------------+
|   id|    tarih2|   fiyat2|     tarih|    fiyat|              oran|
+-----+----------+---------+----------+---------+------------------+
| 2683|19-08-1960|      2.8|22-08-1960|      9.0|221.42857690246746|
| 7589|24-01-1980|     35.0|25-01-1980|     70.0|             100.0|
| 5201|07-08-1970|      9.0|10-08-1970|    14.85|              65.0|
|12912|22-02-2001| 685391.0|23-02-2001| 957879.0|39.756582738903774|
|11172|05-04-1994| 23031.84|06-04-1994| 31988.89|38.889859503259004|
| 743

In [88]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import UserDefinedFunction
from pyspark.sql.types import FloatType

spark = SparkSession.Builder().appName('Spark DataFrame Introduction').getOrCreate()

depremDF = spark.read.option('inferSchema','true')\
                        .option('header','true')\
                        .option('delimiter','\t').csv('datasets/EarthquakeDataset.txt')

depremDF = depremDF.select("Latitude","Longitude","xM","Location")
depremDF.show()

+--------+---------+---+--------------------+
|Latitude|Longitude| xM|            Location|
+--------+---------+---+--------------------+
| 37.8435|  26.7775|3.8|          EGE DENIZI|
| 36.4918|  28.7092|3.9|             AKDENIZ|
|  37.796|  26.4165|4.4|ONIKI ADALAR (AKD...|
| 38.4172|  39.1482|3.9|KAVAKKOY-SIVRICE ...|
|  36.475|  28.7852|4.2|             AKDENIZ|
|  38.505|   39.218|5.6|KAVAKTEPE- (ELAZI...|
| 38.5305|  39.2102|4.1|KAVAKTEPE- (ELAZI...|
| 38.0033|  42.9642|3.9|KORULU-CATAK (VAN...|
| 36.5213|   28.695|3.7|             AKDENIZ|
| 37.8888|  27.6075|3.7|DAGKARAAGAC-GERME...|
| 35.7737|   36.447|3.9|              SURIYE|
| 37.8967|  43.8062|4.5|AYDEMIR-BASKALE (...|
|  37.126|  28.5738|4.0|ARICILAR-ULA (MUG...|
| 37.1175|  31.0832|3.7|ETLER-SERIK (ANTA...|
| 35.7012|  26.2608|3.6|             AKDENIZ|
| 35.7032|  26.2382|4.6|             AKDENIZ|
|  35.704|  26.2952|3.8|             AKDENIZ|
|  35.691|  26.2818|3.9|             AKDENIZ|
| 38.8573|  43.5082|4.7|ERMISLER- 