Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files
update build.gradle
  • Loading branch information
radibnia77 committed Feb 24, 2021
1 parent 641776e commit 2e83de05a521341d7f22ab90721bc716bdda5c69
Showing 8 changed files with 332 additions and 26 deletions.
@@ -0,0 +1,5 @@

IMService/gradlew
IMService/gradlew.bat
IMService/gradle/wrapper/gradle-wrapper.jar
IMService/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
/.gradle/
**/build/**
**/out/**
**/.idea/**
IMService/gradle/**
IMService/gradle*
@@ -1,5 +1,5 @@
plugins {
id "org.sonarqube" version "2.6"
id "io.spring.dependency-management" version "1.0.4.RELEASE"
}

group 'com.bluemarlin.ims.imsservice'
@@ -8,48 +8,53 @@ def versionName() {
return '1.0.0.0'
}

sonarqube {
properties {
property 'sonar.projectName', 'BlueMarlin'
}
}

subprojects {
apply plugin: "java"
apply plugin: 'java'
apply plugin: 'io.spring.dependency-management'

sourceCompatibility = 1.8
targetCompatibility = 1.8
apply plugin: 'project-report'

[compileJava, compileTestJava, javadoc]*.options*.encoding = 'UTF-8'
ext {
commonsLang3Version = '3.4'
junitVersion = '4.12'
buildscript {
repositories {
maven { url "https://plugins.gradle.org/m2/" }
}
}

[compileJava, javadoc]*.options*.encoding = 'UTF-8'

configurations {
compile.exclude group: 'ch.qos.logback'
}
repositories {
maven { url 'http://repo1.maven.org/maven2/' }
maven { url 'https://maven.google.com' }
maven { url "https://maven.google.com" }
maven { url "https://plugins.gradle.org/m2/" }
}

dependencies {
testCompile group: 'junit', name: 'junit', version: "${junitVersion}"
testCompile group: 'junit', name: 'junit', version: '4.12'
testCompile group: 'org.mockito', name: 'mockito-core', version: '1.9.+'
testCompile group: 'org.mockito', name: 'mockito-junit-jupiter', version: '3.3.3'
testCompile group: 'org.hamcrest', name: 'hamcrest-library', version: '1.3'
testCompile group: 'org.hamcrest', name: 'hamcrest-core', version: '1.3'
testCompile group: 'org.springframework', name: 'spring-test', version: '4.3.8.RELEASE'

compile group: 'javax.ws.rs', name: 'javax.ws.rs-api', version: '2.1'
compile group: 'org.apache.commons', name: 'commons-compress', version: '1.16.1'
compile group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: '2.9.6'
compile group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.9.6'
compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '6.2.0'
compile group: 'org.elasticsearch.client', name: 'elasticsearch-rest-high-level-client', version: '6.2.4'
compile group: 'org.elasticsearch', name: 'elasticsearch', version: '6.2.0'
}
}


project(':imsservice') {
apply plugin: 'java'
apply plugin: 'war'

war.baseName = 'imsservice'
war.archiveBaseName = 'imsservice'
project.webAppDirName = 'src/main/webapp'

sourceSets {
main {
java {
@@ -59,15 +64,15 @@ project(':imsservice') {
}

dependencies {
compile group: 'org.springframework', name: 'spring-web', version: "4.2.4.RELEASE"
compile group: 'org.springframework', name: 'spring-webmvc', version: '4.2.4.RELEASE'
compile group: 'org.springframework', name: 'spring-context', version: '4.2.4.RELEASE'
compile group: 'org.springframework', name: 'spring-context-support', version: '4.2.4.RELEASE'
compile group: 'org.springframework', name: 'spring-jms', version: '4.2.4.RELEASE'
compile group: 'org.springframework', name: 'spring-web', version: "4.3.4.RELEASE"
compile group: 'org.springframework', name: 'spring-webmvc', version: '4.3.4.RELEASE'
compile group: 'org.springframework', name: 'spring-context', version: '4.3.4.RELEASE'
compile group: 'org.springframework', name: 'spring-context-support', version: '4.3.4.RELEASE'
compile group: 'org.springframework', name: 'spring-jms', version: '4.3.4.RELEASE'
compile group: 'com.googlecode.json-simple', name: 'json-simple', version: '1.1'
compile group: 'org.codehaus.jackson', name: 'jackson-mapper-asl', version: '1.9.13'
compile group: 'org.glassfish.jersey.core', name: 'jersey-common', version: '2.29'
compile group: 'org.apache.commons', name: 'commons-lang3', version: "${commonsLang3Version}"
compile group: 'org.apache.commons', name: 'commons-lang3', version: '3.4'
compile group: 'org.apache.logging.log4j', name: 'log4j-slf4j-impl', version: '2.11.0'
compile group: 'com.lmax', name: 'disruptor', version: '3.4.2'
compile group: 'org.apache.commons', name: 'commons-collections4', version: '4.2'
@@ -76,10 +81,12 @@ project(':imsservice') {
}

test {
exclude '**/*'
testLogging {
events "passed", "skipped", "failed"
}
}

}


@@ -0,0 +1,8 @@
1. The data is get generated for 1000 users.
2. Each user has scpecific profile.
3. Each user has specific ineterst.
4. Each user as a specific level of activity.
5. Each day has a specific level of activity.
6. Whole year traffic has a pattern.
7. Each day traffic has a pattern.
8. Metia (si,t,m) has specific pattern.
@@ -0,0 +1,230 @@
from datetime import datetime
from datetime import timedelta
import random

# from elasticsearch import Elasticsearch
# from elasticsearch import helpers

# The followings are for Spark/Hive, comment out if there is no push to Hive
from pyspark import SparkContext, SparkConf, Row
from pyspark.sql import HiveContext

import os
import random
import sys


def read_users():
f = open('users.txt', 'r')
data = f.read()
users = eval(data)
f.close()
return users


def calculate_loop_size(datetime_record):
# 0 is sunday
day_of_week_dic = {0: 5, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 3}
hour_dic = {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 2, 7: 3, 8: 4, 9: 5, 10: 5, 11: 5, 12: 6, 13: 6, 14: 5, 15: 5,
16: 4, 17: 4, 18: 4, 19: 4, 20: 3, 21: 3, 22: 3, 23: 2}
holidays = ['2018-01-01', '2018-01-15', '2018-02-19', '2018-03-31', '2018-05-28', '2018-07-04', '2018-09-03',
'2018-11-22', '2018-11-23', '2018-12-25']

base_loop_size_for_each_user = 2

day_of_week_factor = day_of_week_dic[datetime_record.weekday()]
hour_factor = hour_dic[datetime_record.hour]
holiday_str = datetime.strftime(datetime_record, '%Y-%m-%d')
holiday_factor = 1
if holiday_str in holidays:
holiday_factor = 3

return int(base_loop_size_for_each_user * day_of_week_factor * hour_factor * holiday_factor)


def is_user_active_enough(user):
r = random.randint(1, 20)
if (user['activity'] == 'h'):
return True
if (user['activity'] == 'm' and r % 2 == 0):
return True
if (user['activity'] == 'l' and r % 5 == 0):
return True
return False


def generate_traffic_for_user(user):
traffic = user.copy()
traffic.pop('id', None)
traffic.pop('activity', None)
r = random.randint(1, 20)

# smaller si has more chance
si = '1'
if r % 3 == 0:
si = '2'
if r % 5 == 0:
si = '3'
traffic['si'] = si

# if male or unknown, user uses m = magazinelock
m = 'magazinelock'
if user['g'] == 'g_f':
m = 'cloudFolder'
traffic['m'] = m_list[random.randint(0, len(m_list) - 1)]

# More 4G usage than 3G
t = '4G'
if r % 3 == 0:
t = '3G'
traffic['t'] = t_list[random.randint(0, len(t_list) - 1)]

return traffic


def create_new_ucdoc(uckey, traffic):
ucdoc = traffic.copy()
ucdoc['uckey'] = uckey
ucdoc['days'] = {}
return ucdoc


def create_empty_hours():
doc = {'h0': 0, 'h1': 0, 'h2': 0, 'h3': 0, 'total': 0}
#doc = {'total': 0}
hours_list = []
for _ in range(24):
hours_list.append(doc.copy())
return hours_list


def add_to_ucdocs(ucdocs, date, traffic):
uckey_list = [traffic['m'], traffic['si'], traffic['t'],
traffic['g'], traffic['a'], 'pt', traffic['r'], 'icc']
uckey = ','.join(uckey_list)
date_str = datetime.strftime(date, '%Y-%m-%d')
if uckey not in ucdocs:
ucdoc = create_new_ucdoc(uckey, traffic)
ucdocs[uckey] = ucdoc
ucdoc = ucdocs[uckey]
if date_str not in ucdoc['days']:
ucdoc['days'][date_str] = create_empty_hours()

ucdoc['days'][date_str][date.hour]['h0'] += 0
ucdoc['days'][date_str][date.hour]['h1'] += 100
ucdoc['days'][date_str][date.hour]['h2'] += 200
ucdoc['days'][date_str][date.hour]['h3'] += 300

#ucdoc['days'][date_str][date.hour]['total'] += 1


def push_ucdocs_to_hive(ucdocs, bucket_size):
data_list = []
for ucdoc in ucdocs:
for day in ucdoc['days'].keys():
for hour in range(24):
uckey = ucdoc['uckey']
bucket_id = hash(uckey) % (bucket_size)
count_array = []
for i in range(4):
count = ucdoc['days'][day][hour]["h"+str(i)]
cstr = str(i)+":"+str(count)
count_array.append(cstr)

row = Row(uckey=uckey.decode('utf-8'),
bucket_id=bucket_id,
count_array=count_array,
hour=hour,
day=str(day))
# print(row)
data_list.append(row)

df = hive_context.createDataFrame(data_list)

# Spark 1.6
# df.select('uckey',
# 'bucket_id',
# 'count_array',
# 'hour',
# 'day'
# ).write.option("header", "true").option("encoding", "UTF-8").mode('append').partitionBy("day").saveAsTable("factdata")

# Spark 2.3
df.select('uckey',
'bucket_id',
'count_array',
'hour',
'day'
).write.format('hive').option("header", "true").option("encoding", "UTF-8").mode('append').insertInto("factdata")


if __name__ == '__main__':

users = read_users()

start_date_str = '2018-01-01'
end_date_str = '2018-02-01'
start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
end_date = datetime.strptime(end_date_str, '%Y-%m-%d')

m_list = ['cloudFolder', 'magazinelock', 'minusonepage']
t_list = ['3G', '4G', 'Wifi', '5G']

ucdocs = {}

# Loop over time
date = start_date
while date < end_date:
loop_size = calculate_loop_size(date)
print(date)

for _ in range(loop_size):
for user in users:
if is_user_active_enough(user):
traffic = generate_traffic_for_user(user)
add_to_ucdocs(ucdocs, date, traffic)

date = date + timedelta(hours=1)

# The following piece pushes the ucdocs to ES or Hive
push_to_es = False
if push_to_es:

es_host = "10.193.217.111"
es_port = 9200
es_index = 'predictions_test_generated_11142019'
es_type = 'doc'
es = Elasticsearch([{'host': es_host, 'port': es_port}], timeout=600)

print("Pushing docs:" + str(len(ucdocs.values())))
actions = []
for ucdoc in ucdocs.values():
one_doc = {'_index': es_index, '_type': es_type,
'uckey': ucdoc['uckey'], 'ucdoc': ucdoc}
actions.append(one_doc)
helpers.bulk(es, actions)
else:
sc = SparkContext()
hive_context = HiveContext(sc)

command = """
DROP TABLE IF EXISTS factdata
"""
hive_context.sql(command)

command = """
CREATE TABLE IF NOT EXISTS factdata
(
uckey string,
bucket_id int,
count_array array<string>,
hour int
)
partitioned by (day string)
"""
hive_context.setConf("hive.exec.dynamic.partition", "true")
hive_context.setConf("hive.exec.dynamic.partition.mode", "nonstrict")
hive_context.sql(command)

bucket_size = 64
push_ucdocs_to_hive(ucdocs.values(), bucket_size)

0 comments on commit 2e83de0

Please sign in to comment.