## Function

### sheet_parser

In [1]:
def sheet_parser( obj )
  def sheet_parser( json )
    columns = []
    rows = []

    json["feed"]["entry"].each do | entry |
      columns.push( entry['gs$cell']['col'].to_i )
      rows.push( entry['gs$cell']['row'].to_i )
    end

    items = []
    ( 6..rows.max ).each do | row |
      item = {
        rss: nil,
        category: nil,
        name: nil,
        source: nil
      }
      [ 1, 4, 5 ].each.with_index do | column, index |
        tmp = json["feed"]["entry"].find { | entry | 
          entry['gs$cell']['row'].eql?( row.to_s ) and entry['gs$cell']['col'].eql?( column.to_s )
        }
        if tmp.nil?
          item[ item.keys[ index] ] = ''
        else
          if column == 1
            a = URI.parse( tmp['gs$cell']['$t'] )
            b = CGI::parse( URI( tmp['gs$cell']['$t'] ).query )

            b.delete("category")

            item[ item.keys[ index] ] = "#{a.scheme}://#{a.host}#{a.path}"
            if b.length > 0
              params = URI.encode_www_form( b )
              item[ item.keys[ index] ] = "#{item[ item.keys[ index] ]}?#{params}"
            end

          else
            item[ item.keys[ index] ] = tmp['gs$cell']['$t']
          end
        end
      end

      a = URI.parse( item[:rss] )
      case a.host
        when 'www.youtube.com'
          item[:source] = 'yt'
        when 'www.google.com'
          item[:source] = 'ga'
      end

      items.push( item )
    end
    return items
  end

  result = []
 
  tabs = obj[:path][:children][:spreadsheet][:files].keys
  tabs.each.with_index do | tab, index |
    path = obj[:path][:children][:spreadsheet][:files][ tab ][:full]
    file = File.read( path )
    json = JSON.parse( file )

    print "[#{index}]  #{path}"
    result = result + sheet_parser( json )
    puts
  end
  return result
end

:sheet_parser

### sheet_clean

In [2]:
def sheet_clean( url, obj, env )
  uri = URI( url )
  response = Net::HTTP.get( uri )
  d = JSON.parse( response )
  errors = d['groups'].keys
    .map { | a | d['groups'][ a ]['errors'] .map { | a | a['url'] } }
    .flatten
    .select { | a | URI.parse( a ).host.eql?( 'www.youtube.com' ) }

  path = obj[:path][:children][:curlai][:files][:entry][:full]
  file = File.read( path )
  datas = JSON.parse( file ).with_indifferent_access
  d = datas[:data]
    .map { | a | a.except( :source ) }
    .reject { | a | a[:category].eql?( 'Rap' ) }
    .reject { | a | a[:category].eql?( 'Other' ) }
    .reject { | a | errors.include?( a[:rss ] ) }
  
  data = { data: d }
  return data
end

:sheet_clean

### hash_to_rss

In [3]:
def hash_to_rss( title, entries )
    result = ''
    rss = RSS::Maker.make( 'atom' ) do | maker |
      maker.channel.author = ''
      maker.channel.updated = Time.now.to_s
      maker.channel.about = ''
      maker.channel.title = title

      entries.each do | entry |
        maker.items.new_item do | item |
          item.link = entry[:url]
          item.title = entry[:title]
          item.description = entry[:description]
          item.updated = entry[:time][:utc]
        end        
      end
    end

    result = rss.to_s.gsub( '<link href="', '<link rel="alternate" href="' )
  return result
end

:hash_to_rss

### helper_remove_html

In [4]:
def helper_remove_html( html )
  result = ""
  Nokogiri::HTML( CGI.unescapeHTML( html ) ).traverse do | e |
    result << e.text if e.text?
  end
  
  result = result
    .strip
    .split( ' ' )
    .map{ | word | word.capitalize }
    .join( ' ' )
  return result
end

:helper_remove_html

### helper_s3_url

In [5]:
def helper_s3_url( env, obj, type, filename )
  url = ''
  url << 'https://'
  url << env[:AWS_BUCKET_NAME]
  url << '.s3.'
  url << env[:AWS_REGION]
  url << '.amazonaws.com/'
  url << env[:AWS_VERSION]
  url << obj[:struct][:folders][ type ][:name]
  url << filename
  return url
end

:helper_s3_url

### download_rss

In [6]:
def download_rss( url )
  uri = URI( url )
  response = Net::HTTP.get( uri )
  doc = Nokogiri::XML( response )
  return doc
end

:download_rss

### extract_youtube

In [7]:
def extract_youtube( doc, obj )
  def url_html_view( item, obj, channel )
    query = {
      v: 123,
      video_id: nil,
      channel_id: nil,
      title: nil
    }

    query[:video_id] = CGI::parse( item[:url].split( '?' )[ 1 ] )['v'][ 0 ]
    query[:channel_id] = CGI.parse( channel.split( '?' )[ 1 ] )['channel_id'][ 0 ]
    query[:title] = item[:title]

    result = ''
    result << 'https://'
    result << obj[:s3][:bucket_name]
    result << '.s3.'
    result << obj[:s3][:region]
    result << '.amazonaws.com/'
    result << obj[:s3][:bucket_sub_folder]
    result << obj[:struct][:folders][:viewer][:name]
    result << obj[:struct][:folders][:viewer][:files][:yt][:name]
    result << '?'
    
    result << URI.encode_www_form( query )

    return result
  end
  
  feed = {
      meta: {
        title: nil,
        url: nil
      },
      items: []
  }
  
  feed[:meta][:title] = doc.at( 'feed' ).search( 'title' )[ 0 ].text.gsub( '"',"'" )
  feed[:meta][:url] = doc.at( 'feed' ).search( 'link' )[ 0 ].attribute( 'href' ).text

  entries = doc.at( 'feed' ).search( 'entry' )
  entries.each do | entry | 
    item = {
      title: nil,
      description: nil,
      time: {
        stamp: nil,
        utc: nil
      }
    }

    item[:title] = ''
    item[:title] << "▫️ "
    item[:title] << feed[:meta][:title].upcase
    item[:title] << " | "
    item[:title] << helper_remove_html( entry.at( 'title' ).text ) 

    item[:description] = helper_remove_html( entry.xpath( "//media:description" ).text )
    
    threshold = 140
    if item[:description].length >= threshold
      item[:description] = item[:description][ 0, threshold ]
    end

    item[:time][:stamp] = Time.parse( entry.at( 'published' ) ).to_i
    item[:time][:utc] = entry.at( 'published' ).text

    item[:url] = entry.at( 'link' ).attribute( 'href' ).text
    item[:url] = url_html_view( item, obj, feed[:meta][:url] )

    item[:domain] = URI( item[:url] )
      .host
      .split( '.' )[ -2, 2 ]
      .join( '.' )

    feed[:items].push( item )
  end

  return feed
end

:extract_youtube

### analyse_single

In [8]:
def analyse_single( url, obj )
  doc = download_rss( url )
  single = extract_youtube( doc, obj )
  return single
end

:analyse_single

### analyse_groups

In [9]:
def analyse_groups( datas, obj )  
  keys = datas[:data]
    .map { | item | item[:category] }
    .to_set
    .to_a

  groups = {}
  keys.each do | key |
    groups[ key ] = datas[:data].select { | a | a[:category].eql?( key ) }
  end
  
  result = {}
  keys.each.with_index do | key, rindex |
    time = {
      start: Time.now.to_i,
      end: nil,
      time: nil
    }
    
    ConsoleHashArray.console( groups, rindex, nil, key, :left )
    feeds = []
    errors = []
    groups[ key ].each.with_index do | item, cindex |
      sleep 1
      ConsoleHashArray.console( groups, rindex, cindex, key, :right )
      url = nil
      begin
        url = item[:rss]
        doc = download_rss( url )
        feed = []
        case URI.parse( url ).host
          when 'www.youtube.com'
            feed = extract_youtube( doc, obj )
        else
          errors.push( {
            url: url,
            message: 'not parser found'
          } )
        end
        feeds.push( feed )
      rescue => e
        errors.push( {
          url: url,
          message: e
        } )
      end
    end
    
    time[:end] = Time.now.to_i
    time[:time] =  Time.now.to_i - time[:start]
    
    result[ key ] = {
      time: time[:time],
      feeds: feeds,
      errors: errors
    }
  end
  return result
end

:analyse_groups

### merge_groups

In [10]:
def merge_groups( groups_analyse, obj )  
  groups_merged = {} 
  groups_analyse.keys.each.with_index do | key, rindex |
    groups_merged[ key ] = {
      status: {
        message: 'on progress'
      },
      category: {},
      entries: []
    }
    begin
      category = obj[:struct][:categories]
        .find { | a | a[:name].eql?( key ) }

      groups_merged[ key ][:category] = category
      groups_merged[ key ][:entries] = 
        groups_analyse[ key ][:feeds]
          .reject { | a | a.class != Hash }
          .map { | a | a[:items] }
          .flatten
          .sort_by { | a | -a[:time][:stamp] }
      
      groups_merged[ key ][:status] = {
        message: 'success'
      }
    rescue => e
      groups_merged[ key ][:status] = { 
        message: "Error: #{e}"
      }
    end
  end
  return groups_merged
end

:merge_groups

### upload_groups

In [11]:
def upload_groups( merged, obj, env )
  time = {
    start: Time.now.to_i,
    end: nil,
    time: nil
  }
  
  result = {
    time: nil,
    responses: {}
  }
  
  merged.keys.each.with_index do | key, rindex |
    begin
      ConsoleHashArray.console( merged, rindex, nil, key, :left )
      
      rss = hash_to_rss( 
        merged[ key ][:category][:name], 
        merged[ key ][:entries] 
      )
      
      response = upload_single( 
        rss, 
        :categories, 
        merged[ key ][:category][:file], 
        obj, 
        env 
      )  
      result[:responses][ key ] = { status: 'success', message: response }
      puts "Success: #{response}"
    rescue => e
      result[:responses][ key ] = { status: 'error', message: e }
      puts 'error!'
    end
  end
  
  time[:end] = Time.now.to_i
  time[:time] = time[:end] - time[:start]
  
  result[:time] = time[:time]
  
  return result
end

:upload_groups

### upload_single

In [12]:
def upload_single( body, type, filename, obj, env, content_type = 'application/xml' )
  client = Aws::S3::Client.new(
    region: env[:AWS_REGION],
    access_key_id: env[:AWS_ID],
    secret_access_key: env[:AWS_SECRET]
  )
  
  name = ''
  name << env[:AWS_VERSION]
  name << obj[:struct][:folders][ type ][:name]
  name << filename

  r = client.put_object(
    bucket: obj[:s3][:bucket_name],
    content_type: content_type,
    key: name,
    body: body,
    acl: obj[:s3][:access_acl]
  )
  
  url = helper_s3_url( env, obj, type, filename )
  return url
end

:upload_single

### upload_status

In [13]:
def upload_status( datas, analyse, merged, uploaded, obj, env ) 
  messages = {
    overview: {
      time: {},
      all: {},
      types: {}
    },
    groups: {}
  }
  
  analyse.keys.each do | key |
    a = {
      source: nil,
      status: {
        analyse: nil,
        merge: nil,
        upload: nil
      },
      errors: nil,
      time: nil
    }

    aa = analyse[ key ][:feeds].length
    bb = analyse[ key ][:errors].length
    cc = aa + bb 

    a[:status][:analyse] = "(#{aa}, #{bb}, #{cc})"
    
    a[:errors] = analyse[ key ][:errors].map do | item | 
        search = item[:url] 
        find = datas[:data]
          .find { | a | a[:rss].eql?( search ) }
        {
          name: find[:name],
          url: search
        }
    end

    a[:status][:merge] = merged[ key ][:status][:message]
    a[:status][:upload] = uploaded[:responses][ key ][:status]

    if uploaded[:responses][ key ][:message].start_with?( 'http' ) 
      a[:source] = uploaded[:responses][ key ][:message]
    end
    
    a[:time] = analyse[ key ][:time]

    messages[:groups][ key ] = a
  end
  
  #puts 'here'
  stats = messages[:groups].keys.map do | key |
    s = messages[:groups][ key ][:status][:analyse]
    s = s  
      .gsub( /\(|\)|\s*/, '' )
      .split( ',' )
      .map { | a | a.to_i }
  end
  
  # messages
  messages[:overview][:time][:now] = Time.now
  z = analyse.keys
    .map { | key | analyse[ key ][:time] }
    .sum
  
  messages[:overview][:time][:analyse] = 
    Time.at( z ).utc.strftime( "%Mm %Ss" )
  
  zz = uploaded[:time]
  messages[:overview][:time][:upload] = 
    Time.at( zz ).utc.strftime( "%Mm %Ss" )
  
  messages[:overview][:time][:total] = 
    Time.at( ( zz + z ) ).utc.strftime( "%Mm %Ss" )
  
  r = stats[ 0 ].length.times
    .map { | i | stats.inject( 0 ) { | sum, x | sum + x[ i ] } }
   # .join( ', ' ) 
  
  messages[:overview][:all] = {}
  messages[:overview][:all][:total] = r[ 2 ]
  messages[:overview][:all][:error] = r[ 1 ]
  messages[:overview][:all][:success] = r[ 0 ]
  
  #messages[:overview][:status] = "(#{r})"
  totals = datas[:data]
    .map { | a | URI.parse( a[:rss] ).host }
    .inject( Hash.new( 0 ) ) { | total, e | total[ e ] += 1; total }
  
  messages[:overview][:types] = totals.keys.inject( Hash.new( 0 ) ) do | total, e | 
    total[ e ] = {total: 0, error: 0, success: 0} ; total
  end
  
  totals.keys.each do | key |
    messages[:overview][:types][ key ][:total] = totals[ key ]
  end
  
  errors = messages[:groups].keys.map do | key |
      messages[:groups][ key ][:errors]
        .map { | a | URI.parse( a[:url] ).host }
        .inject( Hash.new( 0 ) ) { | total, e | total[ e ] += 1; total }
  end
  
  errors.each do | error |
    error.keys.each do | key |
      messages[:overview][:types][ key ][ :error ] =
        messages[:overview][:types][ key ][ :error ] + error[ key ]
    end
  end
  
  messages[:overview][:types].keys.each do | key |
    messages[:overview][:types][ key ][:success] =
       messages[:overview][:types][ key ][:total] -
       messages[:overview][:types][ key ][:error]
  end
  
  str = JSON.pretty_generate( messages )
  response = upload_single( 
    str, 
    :status, 
    obj[:struct][:folders][:status][:files][:name], 
    obj, 
    env,
    'application/json'
  )  
  return "URL: #{response} "
end

:upload_status

### upload_opml

In [14]:
def upload_opml( obj, env )
  opmls = opml_prepare( obj, env )
  zip = zip_prepare( opmls )
  opmls.keys.each.with_index do | key, index |
    value = opmls[ key ]
    r = upload_single( value[:body], :opml, value[:meta][:file], obj, env )
    puts "[#{index}] #{r}"
  end
  puts
  r = upload_single( zip, :opml, 'opmls.zip', obj, env )
  puts "Opml: #{r}"
end

:upload_opml

### upload_viewer

In [15]:
def upload_viewer( obj, env )
  a = obj[:path][:children][:viewer][:full]
  froms = Dir[ a + '*.html' ]
    .map { | a | 
      { 
        from: a,
        filename: File.basename( a ),
        body: File.read( a )
      } 
  }

  froms.each do | from |
    response = upload_single( 
      from[:body], 
      :viewer, 
      from[:filename], 
      obj, 
      env, 
      content_type = '' 
    )
    puts response
  end
  return true
end

:upload_viewer

### opml_prepare

In [16]:
def opml_prepare( obj, env )
  opmls = {}
  obj[:struct][:slots].keys.each do | opml |
    names = obj[:struct][:categories]
      .select { | a | a[:slot].eql?( opml ) }
      .map { | a | a[:name] }
    opmls[ opml ] = names
  end

  k = {}
  a = opmls.keys.each do | key |
    k[ key ] = obj[:struct][:categories].select { | a | a[:slot].eql?( key ) }
  end

  by_categories = {}
  a = opmls.keys.each do | key |
    by_categories[ key ] = obj[:struct][:categories]
      .select { | aa | aa[:slot].eql?( key ) }
  end

  result = {}
  by_categories.keys.each do | key |
    outlines = []
    by_categories[ key ].each do | rss |
      url = helper_s3_url( env, obj, :categories, rss[:file] )
      feed = {
        text: rss[:name], 
        title: rss[:name], 
        type: "rss",
        xmlUrl: url,
        htmlUrl: url
      }
      outlines.push( feed )
    end

    builder = Nokogiri::XML::Builder.new( encoding: 'UTF-8' ) do | xml |
      xml.opml( version: "1.0" ) do
        xml.head { xml.title 'title' }
        xml.body { outlines.each { | outline | xml.outline( outline ) } }
      end
    end
    
    opml = builder.to_xml
    result[ key ] = {
      meta: obj[:struct][:slots][ key ],
      body: opml
    }
  end
  
  return result
end

:opml_prepare

### zip_prepare

In [30]:
def zip_prepare( opmls )
  stringio = Zip::OutputStream.write_buffer do |zio|
    opmls.keys.each do | key |
      zio.put_next_entry( opmls[ key ][:meta][:file] )
      zio.write( opmls[ key ][:body] )
    end
  end
  body = stringio.string
  return body
end

:zip_prepare

## Execute

In [31]:
require 'open-uri'
require 'local_path_builder'
require 'net/http'
require "active_support/core_ext/hash/indifferent_access"
require 'json'
require 'active_support/core_ext/hash'
require 'nokogiri'
require 'rss'
require 'aws-sdk-s3'
require 'faker'
require 'console_hash_array'
require 'zip'

false

In [32]:
env = File
  .read( './.env' )
  .split( "\n" )
  .map { | n | n.split( /=(.+)/ ) }
  .to_h
  .with_indifferent_access

hash = {
  spreadsheet: {
    id: '12vzGqg_KaXDvjYkFQAsLqQ1aEsUlZbaOOPiadYREpZE'
  },
  path: {
    root: './',
    name: 'files',
    children: {
      spreadsheet: {
        name: '0-spreadsheets',
        files: {
          tab1: {
            name: '1-youtube.json'
          },
          tab2: {
            name: '2-alerts.json'
          }
        }
      },
      curlai: {
        name: '1-curlai',
        files: {
          short: { name: '2-short.json' },
          entry: { name: '1-entry.json' },
          meta: { name: '0-meta.json' }
        }
      },
      viewer: {
        name: '2-viewer'
      }
    }
  },
  struct: {
    folders: {
      status: {
        name: 'status/',
        files: {
          name: 'status.json'
        }
      },
      opml: {
        name: 'opml/'
      },
      categories: {
        name: 'categories/',
      },
      viewer: {
        name: 'viewer/',
        files: {
          yt: {
            name: 'yt.html'
          }
        }
      }
    },
    opml: {},
    slots: {
      morning: { name: 'Morning', file: 'morning.opml' },
      evening: { name: 'Evening', file: 'evening.opml' },
      news: { name: 'News', file: 'news.opml' },
      other: { name: 'Other', file: 'other.opml' }
    },
    categories: [
      { name: 'Music', file: 'music.xml', slot: :evening },
      { name: 'Art', file: 'art.xml', slot: :evening },
      { name: 'Sport', file: 'sport.xml', slot: :evening },
      { name: 'Programming', file: 'programming.xml', slot: :evening },
      { name: 'Universe', file: 'universe.xml', slot: :evening },
      { name: 'Lifestyle', file: 'lifestyle.xml', slot: :evening },
      { name: 'Philosophie', file: 'philosophie.xml', slot: :evening },
      { name: 'Talks Tech', file: 'talks-tech.xml', slot: :morning },
      { name: 'Startup', file: 'startup.xml', slot: :morning },
      { name: 'Hype Technologies', file: 'hype-technologies.xml', slot: :morning },
      { name: 'Artificial Intelligence', file: 'artificial-intelligence.xml', slot: :morning },
      { name: 'Research', file: 'research.xml', slot: :morning },
      { name: 'Development', file: 'development.xml', slot: :morning },
      { name: 'Talks Business', file: 'talks-business.xml', slot: :morning },
      { name: 'Hypecycle (A-D)', file: 'hypecycle-(a-d).xml', slot: :news },
      { name: 'Hypecycle (E-L)', file: 'hypecycle-(e-l).xml', slot: :news },
      { name: 'Hypecycle (M-R)', file: 'hypecycle-(m-r).xml', slot: :news },
      { name: 'Hypecycle (S-Z)', file: 'hypecycle-(s-z).xml', slot: :news },
      { name: 'Persons', file: 'persons.xml', slot: :news },
      { name: 'Programming Topics', file: 'programming-topics.xml', slot: :news },
      { name: 'Politics', file: 'politics.xml', slot: :news },
      { name: 'Other', file: 'other.xml', slot: :other },
      { name: 'Rap', file: 'rap.xml', slot: :news }
    ]
  },
  s3: {
    bucket_name: nil,
    bucket_sub_folder: nil,
    region: nil,
    access_acl: 'public-read',
    filename_with_subfolder: nil,
  }
}

hash[:s3][:bucket_name] = env[:AWS_BUCKET_NAME]
hash[:s3][:bucket_sub_folder] = env[:AWS_VERSION]
hash[:s3][:region] = env[:AWS_REGION]

# hash[:struct][:meta][:opml] = "#{hash[:struct][:meta][:s3]}opml.zip"

# hash[:struct][:categories].each.with_index do | a, index |
#  hash[:struct][:categories][ index ][:path] = 
#  "#{hash[:struct][:meta][:s3]}#{hash[:struct][:categories][ index ][:file]}"
# end

LocalPathBuilder.generate( hash[:path], :both )
puts

Errno::ENOENT: No such file or directory @ rb_sysopen - ./.env

### Upload opml

In [20]:
upload_opml( hash, env )

NoMethodError: undefined method `[]' for nil:NilClass

In [21]:
puts 'A'

A


### Upload Viewer

In [22]:
upload_viewer( hash, env )

NoMethodError: undefined method `[]' for nil:NilClass

### Data Import

In [23]:
path = hash[:path][:children][:curlai][:files][:meta][:full]
File.open( path, "w" ) { | f | f.write( JSON.pretty_generate( hash[:struct] ) ) }

items = { data: spreadsheet_parser( hash ) } 
path = hash[:path][:children][:curlai][:files][:entry][:full]
File.open( path, "w" ) { | f | f.write( JSON.pretty_generate( items ) ) }
puts

NoMethodError: undefined method `[]' for nil:NilClass

### Data Clean Up

In [24]:
url = helper_s3_url( env, hash, :status, hash[:struct][:folders][:status][:files][:name] )
data = sheet_clean( url, hash, env )
path = hash[:path][:children][:curlai][:files][:short][:full]
File.open( path, "w" ) { | f | f.write( JSON.pretty_generate( data ) ) }

NoMethodError: undefined method `[]' for nil:NilClass

### Analyse Single

In [25]:
path = hash[:path][:children][:curlai][:files][:entry][:full]
file = File.read( path )
datas = JSON.parse( file ).with_indifferent_access
url = datas[:data][ 0 ][:rss]
single = analyse_single( url, hash )
puts

NoMethodError: undefined method `[]' for nil:NilClass

### Analyse Group

In [26]:
path = hash[:path][:children][:curlai][:files][:short][:full]
file = File.read( path )
datas = JSON.parse( file ).with_indifferent_access
groups_analyse = analyse_groups( datas, hash )
groups_merged = merge_groups( groups_analyse, hash )
puts

NoMethodError: undefined method `[]' for nil:NilClass

### Upload Group

In [27]:
groups_upload = upload_groups( groups_merged, hash, env )
puts

NoMethodError: undefined method `keys' for nil:NilClass

### Upload Status

In [28]:
upload_status( 
  datas,
  groups_analyse,
  groups_merged,
  groups_upload,
  hash, 
  env
)

NoMethodError: undefined method `keys' for nil:NilClass

In [29]:
groups_merged['Art'][:entries][ 0 ]

NoMethodError: undefined method `[]' for nil:NilClass